{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1.准备工作：设置driver"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 266,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-266-346f04cdf752>:18: DeprecationWarning: use options instead of chrome_options\n",
      "  driver = webdriver.Chrome( chrome_options = opts) #desired_capabilities=caps,\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import time\n",
    "from requests_html import HTMLSession\n",
    "from lxml.html import fromstring\n",
    "from random import random\n",
    "from selenium import webdriver\n",
    "from selenium.webdriver.common.desired_capabilities import DesiredCapabilities\n",
    "\n",
    "opts = webdriver.ChromeOptions()\n",
    "opts.add_argument('--no-sandbox')#解决DevToolsActivePort文件不存在的报错\n",
    "opts.add_argument('window-size=1920x3000') #指定浏览器分辨率\n",
    "opts.add_argument('--disable-gpu') #谷歌文档提到需要加上一这个属性来规避bug\n",
    "opts.add_argument('--hide-scrollbars') #隐藏滚动条, 应对些特殊页面\n",
    "out_path = r'D:\\数据挖掘pdf'  # 是你想指定的路径\n",
    "prefs = {'profile.default_content_settings.popups': 0, 'download.default_directory': out_path}\n",
    "opts.add_experimental_option('prefs', prefs)\n",
    "driver = webdriver.Chrome( chrome_options = opts) #desired_capabilities=caps,"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 267,
   "metadata": {},
   "outputs": [],
   "source": [
    "from selenium.webdriver.support.wait import WebDriverWait\n",
    "from selenium.webdriver.support import expected_conditions as EC\n",
    "from selenium.webdriver.common.by import By\n",
    "from time import sleep\n",
    "import requests\n",
    "import time\n",
    "import hashlib\n",
    "import base64\n",
    "import json\n",
    "import os\n",
    "from PIL import Image "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 打开CNKI\n",
    "* 1. 校园网，自动登录  cnki.net\n",
    "* 2. 校外网，需要登录  fsso.cnki.net"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 268,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 进入知网\n",
    "driver.get(\"https://www.cnki.net/\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 269,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 点击IP登录（因自动登录省略）\n",
    "# driver.find_element_by_xpath('/html/body/div[1]/div[1]/div/div/div/div[1]/div/div/div[4]/a').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 270,
   "metadata": {},
   "outputs": [],
   "source": [
    "# driver.find_element_by_xpath('/html/body/form/div[4]/div/div/div[9]/a[2]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 271,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'中山大学南...'"
      ]
     },
     "execution_count": 271,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 检查登陆状况（是否是中山大学南方学院登录\n",
    "element = driver.find_element_by_id('Ecp_loginShowName1')\n",
    "element.get_attribute('innerHTML')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 高级检索"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 272,
   "metadata": {},
   "outputs": [],
   "source": [
    "element = driver.find_element_by_id('highSearch')\n",
    "element.get_attribute('innerHTML')\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 检查窗口\n",
    "\n",
    "* 眼见不一定为实\n",
    "* 只要是多个窗口，请检查窗口位置信息\n",
    "* 窗口信息根据窗口加载的顺序按照列表存储，每一个窗口有唯一识别的窗口ID"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 273,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['CDwindow-21A00034E7171FEEA7C000AFB02314D6', 'CDwindow-7597892CF395BDC33E8BBED3D045389B']\n"
     ]
    }
   ],
   "source": [
    "# 当前窗口ID\n",
    "print (driver.window_handles)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 274,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['CDwindow-21A00034E7171FEEA7C000AFB02314D6',\n",
       " 'CDwindow-7597892CF395BDC33E8BBED3D045389B']"
      ]
     },
     "execution_count": 274,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 所有窗口ID\n",
    "driver.window_handles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 275,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-275-704fdf3805c1>:2: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[1])\n"
     ]
    }
   ],
   "source": [
    "# 切换窗口\n",
    "driver.switch_to_window(driver.window_handles[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 276,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 选择学术期刊\n",
    "driver.find_element_by_xpath('//li[@data-id=\"xsqk\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 277,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 点击专业检索\n",
    "driver.find_element_by_name('majorSearch').click()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 设置搜索query"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 278,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 设置query\n",
    "query = 'SU = \"平台\" AND  (TI =\"人工智能\" OR  TI =\"大数据\"  OR TI = \"AI\" OR TI = \"big data\")'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 279,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 输入关键词\n",
    "element = driver.find_element_by_xpath('//textarea')\n",
    "element.clear()\n",
    "element.send_keys(query)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 280,
   "metadata": {},
   "outputs": [],
   "source": [
    "element = driver.find_element_by_xpath('//input[@value=\"检索\"]')\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 281,
   "metadata": {},
   "outputs": [],
   "source": [
    "element = driver.find_element_by_xpath('/html/body/div[3]/div[1]/div/div/div/a[1]')\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "/html/body/div[3]/div[1]/div/div/div/a[1]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 选择期刊来源"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 282,
   "metadata": {},
   "outputs": [],
   "source": [
    "driver.find_element_by_xpath('//input[@key=\"CSI\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 283,
   "metadata": {},
   "outputs": [],
   "source": [
    "driver.find_element_by_xpath('//input[@key=\"HX\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 284,
   "metadata": {},
   "outputs": [],
   "source": [
    "## EI来源期刊（此部分不运行因为文章数量会不够）\n",
    "\n",
    "element = driver.find_element_by_xpath('//div[@class=\"extend-tit-labels\"]//input[@key=\"EI\"]')\n",
    "element.get_attribute('innerHTML')\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 285,
   "metadata": {},
   "outputs": [],
   "source": [
    "## SCI来源期刊（此部分不运行因为文章数量会不够）\n",
    "\n",
    "element = driver.find_element_by_xpath('//div[@class=\"extend-tit-labels\"]//input[@key=\"SI\"]')\n",
    "element.get_attribute('innerHTML')\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "## CSCD（此部分不运行因为文章数量会不够）\n",
    "\n",
    "element = driver.find_element_by_xpath('//div[@class=\"extend-tit-labels\"]//input[@key=\"CSD\"]')\n",
    "element.get_attribute('innerHTML')\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 检查检索信息，更换页面文章数量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 286,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 更换每页文章数量（20->50）\n",
    "element = driver.find_element_by_id('perPageDiv')\n",
    "element.click()\n",
    "#更换的区块显示出来才可以运行成功"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 287,
   "metadata": {},
   "outputs": [],
   "source": [
    "element = driver.find_element_by_xpath('//li[@data-val=\"50\"]/a')\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 288,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'共找到<em>17,056</em>条结果'"
      ]
     },
     "execution_count": 288,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "## 检索数量\n",
    "element = driver.find_element_by_xpath('//span[@class=\"pagerTitleCell\"]')\n",
    "element.get_attribute('innerHTML')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 抓取相关重要信息"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 289,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>篇名</th>\n",
       "      <th>作者</th>\n",
       "      <th>刊名</th>\n",
       "      <th>发表时间</th>\n",
       "      <th>被引</th>\n",
       "      <th>下载</th>\n",
       "      <th>操作</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>大数据背景下档案信息服务体系构建方法探析  网络首发</td>\n",
       "      <td>孔媛媛; 张舒; 王爱</td>\n",
       "      <td>档案与建设</td>\n",
       "      <td>2021-07-02 09:42</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>人工智能背景下企业内部审计优化的探讨</td>\n",
       "      <td>雷琼</td>\n",
       "      <td>中小企业管理与科技(中旬刊)</td>\n",
       "      <td>2021-07-02</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>大数据背景下的农村精准扶贫调查报告——以辽宁省阜蒙县为例</td>\n",
       "      <td>赵嘉宁; 包吉; 孟唤婷; 刘旸</td>\n",
       "      <td>中小企业管理与科技(中旬刊)</td>\n",
       "      <td>2021-07-02</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>大数据时代背景下的汽车服务营销及策略研究</td>\n",
       "      <td>韦金孜</td>\n",
       "      <td>中小企业管理与科技(中旬刊)</td>\n",
       "      <td>2021-07-02</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>基于金融大数据平台的用户数据生命周期管理的研究</td>\n",
       "      <td>崔超; 马卉宇</td>\n",
       "      <td>齐齐哈尔大学学报(自然科学版)</td>\n",
       "      <td>2021-06-30</td>\n",
       "      <td>NaN</td>\n",
       "      <td>20.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>平台视域下全国居民疫苗接种的认知、意愿及影响要素——基于五大互联网平台的舆情大数据分析  网络首发</td>\n",
       "      <td>喻国明; 杨雅; 陈雪娇</td>\n",
       "      <td>新闻界</td>\n",
       "      <td>2021-06-28 15:33</td>\n",
       "      <td>NaN</td>\n",
       "      <td>165.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>人工智能背景下我国人力资源服务业的SWOT分析及发展策略</td>\n",
       "      <td>赵根良</td>\n",
       "      <td>中小企业管理与科技(下旬刊)</td>\n",
       "      <td>2021-06-28</td>\n",
       "      <td>NaN</td>\n",
       "      <td>171.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8</td>\n",
       "      <td>大数据技术下金融审计管理研究</td>\n",
       "      <td>黄友鑫</td>\n",
       "      <td>中小企业管理与科技(下旬刊)</td>\n",
       "      <td>2021-06-28</td>\n",
       "      <td>NaN</td>\n",
       "      <td>61.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>9</td>\n",
       "      <td>人工智能时代高校思想政治教育的嬗变与重构</td>\n",
       "      <td>雷铮</td>\n",
       "      <td>高校辅导员学刊</td>\n",
       "      <td>2021-06-28</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>10</td>\n",
       "      <td>时空大数据平台云计算及其典型应用服务探讨</td>\n",
       "      <td>沈凤娇; 余晓敏</td>\n",
       "      <td>地理空间信息</td>\n",
       "      <td>2021-06-28</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>11</td>\n",
       "      <td>大数据时代下事业单位财务管理的创新路径探索</td>\n",
       "      <td>廖小霞</td>\n",
       "      <td>财会学习</td>\n",
       "      <td>2021-06-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>32.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>12</td>\n",
       "      <td>基于人事信息系统的大型综合性医院“大人事”大数据管理</td>\n",
       "      <td>余淳;叶志宏;李章炳;白双;曾祥伦</td>\n",
       "      <td>中国循证医学杂志</td>\n",
       "      <td>2021-06-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>10.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>13</td>\n",
       "      <td>大数据时代高校思想政治教育模式的创新</td>\n",
       "      <td>陈琪</td>\n",
       "      <td>洛阳师范学院学报</td>\n",
       "      <td>2021-06-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>37.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>14</td>\n",
       "      <td>人工智能时代高校翻译技术实践环境建设研究</td>\n",
       "      <td>曹达钦; 戴钰涵</td>\n",
       "      <td>外语界</td>\n",
       "      <td>2021-06-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>15</td>\n",
       "      <td>浅谈大数据在新能源汽车领域的应用</td>\n",
       "      <td>杜志强</td>\n",
       "      <td>智能建筑与智慧城市</td>\n",
       "      <td>2021-06-24</td>\n",
       "      <td>NaN</td>\n",
       "      <td>228.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>16</td>\n",
       "      <td>基于ElasticSearch全文检索的农业地理信息大数据平台设计与实现</td>\n",
       "      <td>刘吉; 孙俊英; 陈忠超; 蔡忠亮</td>\n",
       "      <td>测绘与空间地理信息</td>\n",
       "      <td>2021-06-24</td>\n",
       "      <td>NaN</td>\n",
       "      <td>37.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>17</td>\n",
       "      <td>基于5G与AI的生态环境监测网络平台探讨</td>\n",
       "      <td>徐爱兰; 耿建生</td>\n",
       "      <td>环境监测管理与技术</td>\n",
       "      <td>2021-06-22</td>\n",
       "      <td>NaN</td>\n",
       "      <td>186.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>18</td>\n",
       "      <td>大数据背景下管理会计新探讨</td>\n",
       "      <td>林振刚</td>\n",
       "      <td>中小企业管理与科技(上旬刊)</td>\n",
       "      <td>2021-06-22</td>\n",
       "      <td>NaN</td>\n",
       "      <td>401.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>19</td>\n",
       "      <td>大数据时代会计信息化风险因素与防范对策</td>\n",
       "      <td>刘颖</td>\n",
       "      <td>中小企业管理与科技(上旬刊)</td>\n",
       "      <td>2021-06-22</td>\n",
       "      <td>NaN</td>\n",
       "      <td>196.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>20</td>\n",
       "      <td>基于大数据的煤矿综合业务管控平台开发  网络首发</td>\n",
       "      <td>张波;赵耀忠;刘跃;田文明;赵晋松</td>\n",
       "      <td>热力发电</td>\n",
       "      <td>2021-06-21 17:09</td>\n",
       "      <td>NaN</td>\n",
       "      <td>76.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>21</td>\n",
       "      <td>互联网大数据时代背景下大学生碎片化学习的策略研究</td>\n",
       "      <td>王妍; 王婉婷</td>\n",
       "      <td>中国信息化</td>\n",
       "      <td>2021-06-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>216.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>22</td>\n",
       "      <td>大数据融合分析应用的多角色探索</td>\n",
       "      <td>陈静</td>\n",
       "      <td>中国信息化</td>\n",
       "      <td>2021-06-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>133.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>23</td>\n",
       "      <td>基于大数据的智慧消防系统架构探讨</td>\n",
       "      <td>吴先洪; 车辉; 杨波; 邢慧芬; 樊玉琦</td>\n",
       "      <td>物联网技术</td>\n",
       "      <td>2021-06-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>49.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>24</td>\n",
       "      <td>军队院校物联网工程专业大数据实验室建设方案研究</td>\n",
       "      <td>刘国松; 万平; 李先利; 吴书金</td>\n",
       "      <td>物联网技术</td>\n",
       "      <td>2021-06-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>14.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>25</td>\n",
       "      <td>耳鼻喉科专病大数据平台的设计与实践</td>\n",
       "      <td>戴静娟; 胡萍; 陈名扬; 黄晓琴</td>\n",
       "      <td>中国卫生信息管理杂志</td>\n",
       "      <td>2021-06-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>17.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>26</td>\n",
       "      <td>大数据背景的电子商务商品实体识别算法</td>\n",
       "      <td>王玉玲</td>\n",
       "      <td>微型电脑应用</td>\n",
       "      <td>2021-06-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>61.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>27</td>\n",
       "      <td>基于分布式数据库的大数据平台动态页面数据生成技术</td>\n",
       "      <td>苏莉娜</td>\n",
       "      <td>微型电脑应用</td>\n",
       "      <td>2021-06-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>34.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>28</td>\n",
       "      <td>产教融合背景下高职院校大数据技术与应用专业人才培养的探索与实践</td>\n",
       "      <td>王雪松</td>\n",
       "      <td>科技风</td>\n",
       "      <td>2021-06-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>53.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>29</td>\n",
       "      <td>人工智能视域下影音档案修复与传播路径探讨——以百年前北京影像修复为例</td>\n",
       "      <td>周子晴</td>\n",
       "      <td>北京档案</td>\n",
       "      <td>2021-06-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>30</td>\n",
       "      <td>一种用于大数据的改进的ItemBased推荐算法</td>\n",
       "      <td>李洋; 黄树成</td>\n",
       "      <td>计算机与数字工程</td>\n",
       "      <td>2021-06-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>31</td>\n",
       "      <td>铜仁市农业大数据发展探讨</td>\n",
       "      <td>张军; 黄义君; 王海军; 万伟</td>\n",
       "      <td>现代农业科技</td>\n",
       "      <td>2021-06-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>32</td>\n",
       "      <td>大数据视域下公安院校网络思想政治教育创新研究</td>\n",
       "      <td>赵志强</td>\n",
       "      <td>福建警察学院学报</td>\n",
       "      <td>2021-06-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>33</td>\n",
       "      <td>大数据背景下暴恐事件的特点及应对策略研究</td>\n",
       "      <td>綦凤; 杨金召; 涂瀚文</td>\n",
       "      <td>河北公安警察职业学院学报</td>\n",
       "      <td>2021-06-18</td>\n",
       "      <td>NaN</td>\n",
       "      <td>27.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>34</td>\n",
       "      <td>大数据时代高校信息化教学资源共享平台构建探析</td>\n",
       "      <td>魏妍</td>\n",
       "      <td>电脑编程技巧与维护</td>\n",
       "      <td>2021-06-18</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>35</td>\n",
       "      <td>基于大数据的数字化电能计量误差分析  网络首发</td>\n",
       "      <td>田园; 张梅; 保富; 原野</td>\n",
       "      <td>电测与仪表</td>\n",
       "      <td>2021-06-17 10:38</td>\n",
       "      <td>NaN</td>\n",
       "      <td>69.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>36</td>\n",
       "      <td>大数据背景下食用菌企业精准营销路径研究</td>\n",
       "      <td>范文婷</td>\n",
       "      <td>北方经贸</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>92.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>37</td>\n",
       "      <td>大数据背景下基层党组织建设的困境与解决路径</td>\n",
       "      <td>周青</td>\n",
       "      <td>中小企业管理与科技(中旬刊)</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>49.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>38</td>\n",
       "      <td>大数据智能审计助推企业高质量发展</td>\n",
       "      <td>楚文光; 胡为民; 雷年桢; 陈赛霞</td>\n",
       "      <td>中国内部审计</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>198.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>39</td>\n",
       "      <td>从人工路巡到智能发现 图元视觉AI智能平台让城市管理更智慧</td>\n",
       "      <td>周兴</td>\n",
       "      <td>中国建设信息化</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>40</td>\n",
       "      <td>人工智能驾驶技术商业化分析</td>\n",
       "      <td>孙吴炎</td>\n",
       "      <td>办公自动化</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>82.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>41</td>\n",
       "      <td>大数据背景下基层党组织建设的困境与解决路径</td>\n",
       "      <td>周青</td>\n",
       "      <td>中小企业管理与科技(中旬刊)</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>49.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>42</td>\n",
       "      <td>人工智能驾驶技术商业化分析</td>\n",
       "      <td>孙吴炎</td>\n",
       "      <td>办公自动化</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>82.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>43</td>\n",
       "      <td>大数据智能审计助推企业高质量发展</td>\n",
       "      <td>楚文光; 胡为民; 雷年桢; 陈赛霞</td>\n",
       "      <td>中国内部审计</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>198.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>44</td>\n",
       "      <td>大数据背景下食用菌企业精准营销路径研究</td>\n",
       "      <td>范文婷</td>\n",
       "      <td>北方经贸</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>92.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>45</td>\n",
       "      <td>从人工路巡到智能发现 图元视觉AI智能平台让城市管理更智慧</td>\n",
       "      <td>周兴</td>\n",
       "      <td>中国建设信息化</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>46</td>\n",
       "      <td>快递100将推出包装碳排放大数据</td>\n",
       "      <td>NaN</td>\n",
       "      <td>中国包装</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>30.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>47</td>\n",
       "      <td>人工智能在图书馆的实施路径研究</td>\n",
       "      <td>侯志江</td>\n",
       "      <td>图书馆工作与研究</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>94.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>48</td>\n",
       "      <td>浅谈“AI+云”技术在现代农业的运用</td>\n",
       "      <td>杨永西; 罗岳文; 陈向生</td>\n",
       "      <td>现代化农业</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>39.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>49</td>\n",
       "      <td>基于大数据分析的在线教学有效性提升策略与实施路径</td>\n",
       "      <td>张汉萍</td>\n",
       "      <td>武汉职业技术学院学报</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>49.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>50</td>\n",
       "      <td>“互联网+人工智能”赋能下的国企党建创新</td>\n",
       "      <td>胡秀英</td>\n",
       "      <td>现代企业</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>17.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Unnamed: 0                                                 篇名  \\\n",
       "0            1                         大数据背景下档案信息服务体系构建方法探析  网络首发   \n",
       "1            2                                 人工智能背景下企业内部审计优化的探讨   \n",
       "2            3                       大数据背景下的农村精准扶贫调查报告——以辽宁省阜蒙县为例   \n",
       "3            4                               大数据时代背景下的汽车服务营销及策略研究   \n",
       "4            5                            基于金融大数据平台的用户数据生命周期管理的研究   \n",
       "5            6  平台视域下全国居民疫苗接种的认知、意愿及影响要素——基于五大互联网平台的舆情大数据分析  网络首发   \n",
       "6            7                       人工智能背景下我国人力资源服务业的SWOT分析及发展策略   \n",
       "7            8                                     大数据技术下金融审计管理研究   \n",
       "8            9                               人工智能时代高校思想政治教育的嬗变与重构   \n",
       "9           10                               时空大数据平台云计算及其典型应用服务探讨   \n",
       "10          11                              大数据时代下事业单位财务管理的创新路径探索   \n",
       "11          12                         基于人事信息系统的大型综合性医院“大人事”大数据管理   \n",
       "12          13                                 大数据时代高校思想政治教育模式的创新   \n",
       "13          14                               人工智能时代高校翻译技术实践环境建设研究   \n",
       "14          15                                   浅谈大数据在新能源汽车领域的应用   \n",
       "15          16               基于ElasticSearch全文检索的农业地理信息大数据平台设计与实现   \n",
       "16          17                               基于5G与AI的生态环境监测网络平台探讨   \n",
       "17          18                                      大数据背景下管理会计新探讨   \n",
       "18          19                                大数据时代会计信息化风险因素与防范对策   \n",
       "19          20                           基于大数据的煤矿综合业务管控平台开发  网络首发   \n",
       "20          21                           互联网大数据时代背景下大学生碎片化学习的策略研究   \n",
       "21          22                                    大数据融合分析应用的多角色探索   \n",
       "22          23                                   基于大数据的智慧消防系统架构探讨   \n",
       "23          24                            军队院校物联网工程专业大数据实验室建设方案研究   \n",
       "24          25                                  耳鼻喉科专病大数据平台的设计与实践   \n",
       "25          26                                 大数据背景的电子商务商品实体识别算法   \n",
       "26          27                           基于分布式数据库的大数据平台动态页面数据生成技术   \n",
       "27          28                    产教融合背景下高职院校大数据技术与应用专业人才培养的探索与实践   \n",
       "28          29                 人工智能视域下影音档案修复与传播路径探讨——以百年前北京影像修复为例   \n",
       "29          30                           一种用于大数据的改进的ItemBased推荐算法   \n",
       "30          31                                       铜仁市农业大数据发展探讨   \n",
       "31          32                             大数据视域下公安院校网络思想政治教育创新研究   \n",
       "32          33                               大数据背景下暴恐事件的特点及应对策略研究   \n",
       "33          34                             大数据时代高校信息化教学资源共享平台构建探析   \n",
       "34          35                            基于大数据的数字化电能计量误差分析  网络首发   \n",
       "35          36                                大数据背景下食用菌企业精准营销路径研究   \n",
       "36          37                              大数据背景下基层党组织建设的困境与解决路径   \n",
       "37          38                                   大数据智能审计助推企业高质量发展   \n",
       "38          39                      从人工路巡到智能发现 图元视觉AI智能平台让城市管理更智慧   \n",
       "39          40                                      人工智能驾驶技术商业化分析   \n",
       "40          41                              大数据背景下基层党组织建设的困境与解决路径   \n",
       "41          42                                      人工智能驾驶技术商业化分析   \n",
       "42          43                                   大数据智能审计助推企业高质量发展   \n",
       "43          44                                大数据背景下食用菌企业精准营销路径研究   \n",
       "44          45                      从人工路巡到智能发现 图元视觉AI智能平台让城市管理更智慧   \n",
       "45          46                                   快递100将推出包装碳排放大数据   \n",
       "46          47                                    人工智能在图书馆的实施路径研究   \n",
       "47          48                                 浅谈“AI+云”技术在现代农业的运用   \n",
       "48          49                           基于大数据分析的在线教学有效性提升策略与实施路径   \n",
       "49          50                               “互联网+人工智能”赋能下的国企党建创新   \n",
       "\n",
       "                       作者               刊名              发表时间  被引     下载  操作  \n",
       "0             孔媛媛; 张舒; 王爱            档案与建设  2021-07-02 09:42 NaN    NaN  下载  \n",
       "1                      雷琼   中小企业管理与科技(中旬刊)        2021-07-02 NaN    NaN  下载  \n",
       "2        赵嘉宁; 包吉; 孟唤婷; 刘旸   中小企业管理与科技(中旬刊)        2021-07-02 NaN    NaN  下载  \n",
       "3                     韦金孜   中小企业管理与科技(中旬刊)        2021-07-02 NaN    NaN  下载  \n",
       "4                 崔超; 马卉宇  齐齐哈尔大学学报(自然科学版)        2021-06-30 NaN   20.0  下载  \n",
       "5            喻国明; 杨雅; 陈雪娇              新闻界  2021-06-28 15:33 NaN  165.0  下载  \n",
       "6                     赵根良   中小企业管理与科技(下旬刊)        2021-06-28 NaN  171.0  下载  \n",
       "7                     黄友鑫   中小企业管理与科技(下旬刊)        2021-06-28 NaN   61.0  下载  \n",
       "8                      雷铮          高校辅导员学刊        2021-06-28 NaN    NaN  下载  \n",
       "9                沈凤娇; 余晓敏           地理空间信息        2021-06-28 NaN    NaN  下载  \n",
       "10                    廖小霞             财会学习        2021-06-25 NaN   32.0  下载  \n",
       "11      余淳;叶志宏;李章炳;白双;曾祥伦         中国循证医学杂志        2021-06-25 NaN   10.0  下载  \n",
       "12                     陈琪         洛阳师范学院学报        2021-06-25 NaN   37.0  下载  \n",
       "13               曹达钦; 戴钰涵              外语界        2021-06-25 NaN    NaN  下载  \n",
       "14                    杜志强        智能建筑与智慧城市        2021-06-24 NaN  228.0  下载  \n",
       "15      刘吉; 孙俊英; 陈忠超; 蔡忠亮        测绘与空间地理信息        2021-06-24 NaN   37.0  下载  \n",
       "16               徐爱兰; 耿建生        环境监测管理与技术        2021-06-22 NaN  186.0  下载  \n",
       "17                    林振刚   中小企业管理与科技(上旬刊)        2021-06-22 NaN  401.0  下载  \n",
       "18                     刘颖   中小企业管理与科技(上旬刊)        2021-06-22 NaN  196.0  下载  \n",
       "19      张波;赵耀忠;刘跃;田文明;赵晋松             热力发电  2021-06-21 17:09 NaN   76.0  下载  \n",
       "20                王妍; 王婉婷            中国信息化        2021-06-20 NaN  216.0  下载  \n",
       "21                     陈静            中国信息化        2021-06-20 NaN  133.0  下载  \n",
       "22  吴先洪; 车辉; 杨波; 邢慧芬; 樊玉琦            物联网技术        2021-06-20 NaN   49.0  下载  \n",
       "23      刘国松; 万平; 李先利; 吴书金            物联网技术        2021-06-20 NaN   14.0  下载  \n",
       "24      戴静娟; 胡萍; 陈名扬; 黄晓琴       中国卫生信息管理杂志        2021-06-20 NaN   17.0  下载  \n",
       "25                    王玉玲           微型电脑应用        2021-06-20 NaN   61.0  下载  \n",
       "26                    苏莉娜           微型电脑应用        2021-06-20 NaN   34.0  下载  \n",
       "27                    王雪松              科技风        2021-06-20 NaN   53.0  下载  \n",
       "28                    周子晴             北京档案        2021-06-20 NaN    7.0  下载  \n",
       "29                李洋; 黄树成         计算机与数字工程        2021-06-20 NaN    1.0  下载  \n",
       "30       张军; 黄义君; 王海军; 万伟           现代农业科技        2021-06-20 NaN    NaN  下载  \n",
       "31                    赵志强         福建警察学院学报        2021-06-20 NaN    NaN  下载  \n",
       "32           綦凤; 杨金召; 涂瀚文     河北公安警察职业学院学报        2021-06-18 NaN   27.0  下载  \n",
       "33                     魏妍        电脑编程技巧与维护        2021-06-18 NaN    4.0  下载  \n",
       "34         田园; 张梅; 保富; 原野            电测与仪表  2021-06-17 10:38 NaN   69.0  下载  \n",
       "35                    范文婷             北方经贸        2021-06-15 NaN   92.0  下载  \n",
       "36                     周青   中小企业管理与科技(中旬刊)        2021-06-15 NaN   49.0  下载  \n",
       "37     楚文光; 胡为民; 雷年桢; 陈赛霞           中国内部审计        2021-06-15 NaN  198.0  下载  \n",
       "38                     周兴          中国建设信息化        2021-06-15 NaN    9.0  下载  \n",
       "39                    孙吴炎            办公自动化        2021-06-15 NaN   82.0  下载  \n",
       "40                     周青   中小企业管理与科技(中旬刊)        2021-06-15 NaN   49.0  下载  \n",
       "41                    孙吴炎            办公自动化        2021-06-15 NaN   82.0  下载  \n",
       "42     楚文光; 胡为民; 雷年桢; 陈赛霞           中国内部审计        2021-06-15 NaN  198.0  下载  \n",
       "43                    范文婷             北方经贸        2021-06-15 NaN   92.0  下载  \n",
       "44                     周兴          中国建设信息化        2021-06-15 NaN    9.0  下载  \n",
       "45                    NaN             中国包装        2021-06-15 NaN   30.0  下载  \n",
       "46                    侯志江         图书馆工作与研究        2021-06-15 NaN   94.0  下载  \n",
       "47          杨永西; 罗岳文; 陈向生            现代化农业        2021-06-15 NaN   39.0  下载  \n",
       "48                    张汉萍       武汉职业技术学院学报        2021-06-15 NaN   49.0  下载  \n",
       "49                    胡秀英             现代企业        2021-06-15 NaN   17.0  下载  "
      ]
     },
     "execution_count": 289,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "element = driver.find_element_by_id('gridTable')\n",
    "page_html = element.get_attribute('innerHTML')\n",
    "首页主要数据 = pd.read_html(page_html)[0]\n",
    "\n",
    "首页主要数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 290,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 获取详细页链接\n",
    "import re\n",
    "\n",
    "html = driver.page_source\n",
    "root = fromstring(html)\n",
    "href = [x for x in root.xpath('.//*[@id=\"gridTable\"]/table/tbody/tr[1]/td[2]/a/@href')]\n",
    "\n",
    "def correct(href):\n",
    "    DbCode = re.findall(\"DbCode=(.*?)&\",href)[0]\n",
    "    dbname = re.findall(\"dbname=(.*?)&\",href)[0]\n",
    "    filename = re.findall(\"filename=(.*?)&\",href)[0]\n",
    "    href = f\"https://kns.cnki.net/kcms/detail/detail.aspx?dbcode={DbCode}&dbname={dbname}&filename={filename}\"\n",
    "    return href"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 291,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>篇名</th>\n",
       "      <th>作者</th>\n",
       "      <th>刊名</th>\n",
       "      <th>发表时间</th>\n",
       "      <th>被引</th>\n",
       "      <th>下载</th>\n",
       "      <th>操作</th>\n",
       "      <th>详细页链接</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>大数据背景下档案信息服务体系构建方法探析  网络首发</td>\n",
       "      <td>孔媛媛; 张舒; 王爱</td>\n",
       "      <td>档案与建设</td>\n",
       "      <td>2021-07-02 09:42</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "      <td>https://kns.cnki.net/kcms/detail/detail.aspx?d...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>人工智能背景下企业内部审计优化的探讨</td>\n",
       "      <td>雷琼</td>\n",
       "      <td>中小企业管理与科技(中旬刊)</td>\n",
       "      <td>2021-07-02</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>大数据背景下的农村精准扶贫调查报告——以辽宁省阜蒙县为例</td>\n",
       "      <td>赵嘉宁; 包吉; 孟唤婷; 刘旸</td>\n",
       "      <td>中小企业管理与科技(中旬刊)</td>\n",
       "      <td>2021-07-02</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>大数据时代背景下的汽车服务营销及策略研究</td>\n",
       "      <td>韦金孜</td>\n",
       "      <td>中小企业管理与科技(中旬刊)</td>\n",
       "      <td>2021-07-02</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>基于金融大数据平台的用户数据生命周期管理的研究</td>\n",
       "      <td>崔超; 马卉宇</td>\n",
       "      <td>齐齐哈尔大学学报(自然科学版)</td>\n",
       "      <td>2021-06-30</td>\n",
       "      <td>NaN</td>\n",
       "      <td>20.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>平台视域下全国居民疫苗接种的认知、意愿及影响要素——基于五大互联网平台的舆情大数据分析  网络首发</td>\n",
       "      <td>喻国明; 杨雅; 陈雪娇</td>\n",
       "      <td>新闻界</td>\n",
       "      <td>2021-06-28 15:33</td>\n",
       "      <td>NaN</td>\n",
       "      <td>165.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>人工智能背景下我国人力资源服务业的SWOT分析及发展策略</td>\n",
       "      <td>赵根良</td>\n",
       "      <td>中小企业管理与科技(下旬刊)</td>\n",
       "      <td>2021-06-28</td>\n",
       "      <td>NaN</td>\n",
       "      <td>171.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8</td>\n",
       "      <td>大数据技术下金融审计管理研究</td>\n",
       "      <td>黄友鑫</td>\n",
       "      <td>中小企业管理与科技(下旬刊)</td>\n",
       "      <td>2021-06-28</td>\n",
       "      <td>NaN</td>\n",
       "      <td>61.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>9</td>\n",
       "      <td>人工智能时代高校思想政治教育的嬗变与重构</td>\n",
       "      <td>雷铮</td>\n",
       "      <td>高校辅导员学刊</td>\n",
       "      <td>2021-06-28</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>10</td>\n",
       "      <td>时空大数据平台云计算及其典型应用服务探讨</td>\n",
       "      <td>沈凤娇; 余晓敏</td>\n",
       "      <td>地理空间信息</td>\n",
       "      <td>2021-06-28</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>11</td>\n",
       "      <td>大数据时代下事业单位财务管理的创新路径探索</td>\n",
       "      <td>廖小霞</td>\n",
       "      <td>财会学习</td>\n",
       "      <td>2021-06-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>32.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>12</td>\n",
       "      <td>基于人事信息系统的大型综合性医院“大人事”大数据管理</td>\n",
       "      <td>余淳;叶志宏;李章炳;白双;曾祥伦</td>\n",
       "      <td>中国循证医学杂志</td>\n",
       "      <td>2021-06-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>10.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>13</td>\n",
       "      <td>大数据时代高校思想政治教育模式的创新</td>\n",
       "      <td>陈琪</td>\n",
       "      <td>洛阳师范学院学报</td>\n",
       "      <td>2021-06-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>37.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>14</td>\n",
       "      <td>人工智能时代高校翻译技术实践环境建设研究</td>\n",
       "      <td>曹达钦; 戴钰涵</td>\n",
       "      <td>外语界</td>\n",
       "      <td>2021-06-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>15</td>\n",
       "      <td>浅谈大数据在新能源汽车领域的应用</td>\n",
       "      <td>杜志强</td>\n",
       "      <td>智能建筑与智慧城市</td>\n",
       "      <td>2021-06-24</td>\n",
       "      <td>NaN</td>\n",
       "      <td>228.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>16</td>\n",
       "      <td>基于ElasticSearch全文检索的农业地理信息大数据平台设计与实现</td>\n",
       "      <td>刘吉; 孙俊英; 陈忠超; 蔡忠亮</td>\n",
       "      <td>测绘与空间地理信息</td>\n",
       "      <td>2021-06-24</td>\n",
       "      <td>NaN</td>\n",
       "      <td>37.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>17</td>\n",
       "      <td>基于5G与AI的生态环境监测网络平台探讨</td>\n",
       "      <td>徐爱兰; 耿建生</td>\n",
       "      <td>环境监测管理与技术</td>\n",
       "      <td>2021-06-22</td>\n",
       "      <td>NaN</td>\n",
       "      <td>186.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>18</td>\n",
       "      <td>大数据背景下管理会计新探讨</td>\n",
       "      <td>林振刚</td>\n",
       "      <td>中小企业管理与科技(上旬刊)</td>\n",
       "      <td>2021-06-22</td>\n",
       "      <td>NaN</td>\n",
       "      <td>401.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>19</td>\n",
       "      <td>大数据时代会计信息化风险因素与防范对策</td>\n",
       "      <td>刘颖</td>\n",
       "      <td>中小企业管理与科技(上旬刊)</td>\n",
       "      <td>2021-06-22</td>\n",
       "      <td>NaN</td>\n",
       "      <td>196.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>20</td>\n",
       "      <td>基于大数据的煤矿综合业务管控平台开发  网络首发</td>\n",
       "      <td>张波;赵耀忠;刘跃;田文明;赵晋松</td>\n",
       "      <td>热力发电</td>\n",
       "      <td>2021-06-21 17:09</td>\n",
       "      <td>NaN</td>\n",
       "      <td>76.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>21</td>\n",
       "      <td>互联网大数据时代背景下大学生碎片化学习的策略研究</td>\n",
       "      <td>王妍; 王婉婷</td>\n",
       "      <td>中国信息化</td>\n",
       "      <td>2021-06-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>216.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>22</td>\n",
       "      <td>大数据融合分析应用的多角色探索</td>\n",
       "      <td>陈静</td>\n",
       "      <td>中国信息化</td>\n",
       "      <td>2021-06-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>133.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>23</td>\n",
       "      <td>基于大数据的智慧消防系统架构探讨</td>\n",
       "      <td>吴先洪; 车辉; 杨波; 邢慧芬; 樊玉琦</td>\n",
       "      <td>物联网技术</td>\n",
       "      <td>2021-06-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>49.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>24</td>\n",
       "      <td>军队院校物联网工程专业大数据实验室建设方案研究</td>\n",
       "      <td>刘国松; 万平; 李先利; 吴书金</td>\n",
       "      <td>物联网技术</td>\n",
       "      <td>2021-06-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>14.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>25</td>\n",
       "      <td>耳鼻喉科专病大数据平台的设计与实践</td>\n",
       "      <td>戴静娟; 胡萍; 陈名扬; 黄晓琴</td>\n",
       "      <td>中国卫生信息管理杂志</td>\n",
       "      <td>2021-06-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>17.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>26</td>\n",
       "      <td>大数据背景的电子商务商品实体识别算法</td>\n",
       "      <td>王玉玲</td>\n",
       "      <td>微型电脑应用</td>\n",
       "      <td>2021-06-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>61.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>27</td>\n",
       "      <td>基于分布式数据库的大数据平台动态页面数据生成技术</td>\n",
       "      <td>苏莉娜</td>\n",
       "      <td>微型电脑应用</td>\n",
       "      <td>2021-06-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>34.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>28</td>\n",
       "      <td>产教融合背景下高职院校大数据技术与应用专业人才培养的探索与实践</td>\n",
       "      <td>王雪松</td>\n",
       "      <td>科技风</td>\n",
       "      <td>2021-06-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>53.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>29</td>\n",
       "      <td>人工智能视域下影音档案修复与传播路径探讨——以百年前北京影像修复为例</td>\n",
       "      <td>周子晴</td>\n",
       "      <td>北京档案</td>\n",
       "      <td>2021-06-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>30</td>\n",
       "      <td>一种用于大数据的改进的ItemBased推荐算法</td>\n",
       "      <td>李洋; 黄树成</td>\n",
       "      <td>计算机与数字工程</td>\n",
       "      <td>2021-06-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>31</td>\n",
       "      <td>铜仁市农业大数据发展探讨</td>\n",
       "      <td>张军; 黄义君; 王海军; 万伟</td>\n",
       "      <td>现代农业科技</td>\n",
       "      <td>2021-06-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>32</td>\n",
       "      <td>大数据视域下公安院校网络思想政治教育创新研究</td>\n",
       "      <td>赵志强</td>\n",
       "      <td>福建警察学院学报</td>\n",
       "      <td>2021-06-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>33</td>\n",
       "      <td>大数据背景下暴恐事件的特点及应对策略研究</td>\n",
       "      <td>綦凤; 杨金召; 涂瀚文</td>\n",
       "      <td>河北公安警察职业学院学报</td>\n",
       "      <td>2021-06-18</td>\n",
       "      <td>NaN</td>\n",
       "      <td>27.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>34</td>\n",
       "      <td>大数据时代高校信息化教学资源共享平台构建探析</td>\n",
       "      <td>魏妍</td>\n",
       "      <td>电脑编程技巧与维护</td>\n",
       "      <td>2021-06-18</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>35</td>\n",
       "      <td>基于大数据的数字化电能计量误差分析  网络首发</td>\n",
       "      <td>田园; 张梅; 保富; 原野</td>\n",
       "      <td>电测与仪表</td>\n",
       "      <td>2021-06-17 10:38</td>\n",
       "      <td>NaN</td>\n",
       "      <td>69.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>36</td>\n",
       "      <td>大数据背景下食用菌企业精准营销路径研究</td>\n",
       "      <td>范文婷</td>\n",
       "      <td>北方经贸</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>92.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>37</td>\n",
       "      <td>大数据背景下基层党组织建设的困境与解决路径</td>\n",
       "      <td>周青</td>\n",
       "      <td>中小企业管理与科技(中旬刊)</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>49.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>38</td>\n",
       "      <td>大数据智能审计助推企业高质量发展</td>\n",
       "      <td>楚文光; 胡为民; 雷年桢; 陈赛霞</td>\n",
       "      <td>中国内部审计</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>198.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>39</td>\n",
       "      <td>从人工路巡到智能发现 图元视觉AI智能平台让城市管理更智慧</td>\n",
       "      <td>周兴</td>\n",
       "      <td>中国建设信息化</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>40</td>\n",
       "      <td>人工智能驾驶技术商业化分析</td>\n",
       "      <td>孙吴炎</td>\n",
       "      <td>办公自动化</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>82.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>41</td>\n",
       "      <td>大数据背景下基层党组织建设的困境与解决路径</td>\n",
       "      <td>周青</td>\n",
       "      <td>中小企业管理与科技(中旬刊)</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>49.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>42</td>\n",
       "      <td>人工智能驾驶技术商业化分析</td>\n",
       "      <td>孙吴炎</td>\n",
       "      <td>办公自动化</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>82.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>43</td>\n",
       "      <td>大数据智能审计助推企业高质量发展</td>\n",
       "      <td>楚文光; 胡为民; 雷年桢; 陈赛霞</td>\n",
       "      <td>中国内部审计</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>198.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>44</td>\n",
       "      <td>大数据背景下食用菌企业精准营销路径研究</td>\n",
       "      <td>范文婷</td>\n",
       "      <td>北方经贸</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>92.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>45</td>\n",
       "      <td>从人工路巡到智能发现 图元视觉AI智能平台让城市管理更智慧</td>\n",
       "      <td>周兴</td>\n",
       "      <td>中国建设信息化</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>46</td>\n",
       "      <td>快递100将推出包装碳排放大数据</td>\n",
       "      <td>NaN</td>\n",
       "      <td>中国包装</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>30.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>47</td>\n",
       "      <td>人工智能在图书馆的实施路径研究</td>\n",
       "      <td>侯志江</td>\n",
       "      <td>图书馆工作与研究</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>94.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>48</td>\n",
       "      <td>浅谈“AI+云”技术在现代农业的运用</td>\n",
       "      <td>杨永西; 罗岳文; 陈向生</td>\n",
       "      <td>现代化农业</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>39.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>49</td>\n",
       "      <td>基于大数据分析的在线教学有效性提升策略与实施路径</td>\n",
       "      <td>张汉萍</td>\n",
       "      <td>武汉职业技术学院学报</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>49.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>50</td>\n",
       "      <td>“互联网+人工智能”赋能下的国企党建创新</td>\n",
       "      <td>胡秀英</td>\n",
       "      <td>现代企业</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>17.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Unnamed: 0                                                 篇名  \\\n",
       "0            1                         大数据背景下档案信息服务体系构建方法探析  网络首发   \n",
       "1            2                                 人工智能背景下企业内部审计优化的探讨   \n",
       "2            3                       大数据背景下的农村精准扶贫调查报告——以辽宁省阜蒙县为例   \n",
       "3            4                               大数据时代背景下的汽车服务营销及策略研究   \n",
       "4            5                            基于金融大数据平台的用户数据生命周期管理的研究   \n",
       "5            6  平台视域下全国居民疫苗接种的认知、意愿及影响要素——基于五大互联网平台的舆情大数据分析  网络首发   \n",
       "6            7                       人工智能背景下我国人力资源服务业的SWOT分析及发展策略   \n",
       "7            8                                     大数据技术下金融审计管理研究   \n",
       "8            9                               人工智能时代高校思想政治教育的嬗变与重构   \n",
       "9           10                               时空大数据平台云计算及其典型应用服务探讨   \n",
       "10          11                              大数据时代下事业单位财务管理的创新路径探索   \n",
       "11          12                         基于人事信息系统的大型综合性医院“大人事”大数据管理   \n",
       "12          13                                 大数据时代高校思想政治教育模式的创新   \n",
       "13          14                               人工智能时代高校翻译技术实践环境建设研究   \n",
       "14          15                                   浅谈大数据在新能源汽车领域的应用   \n",
       "15          16               基于ElasticSearch全文检索的农业地理信息大数据平台设计与实现   \n",
       "16          17                               基于5G与AI的生态环境监测网络平台探讨   \n",
       "17          18                                      大数据背景下管理会计新探讨   \n",
       "18          19                                大数据时代会计信息化风险因素与防范对策   \n",
       "19          20                           基于大数据的煤矿综合业务管控平台开发  网络首发   \n",
       "20          21                           互联网大数据时代背景下大学生碎片化学习的策略研究   \n",
       "21          22                                    大数据融合分析应用的多角色探索   \n",
       "22          23                                   基于大数据的智慧消防系统架构探讨   \n",
       "23          24                            军队院校物联网工程专业大数据实验室建设方案研究   \n",
       "24          25                                  耳鼻喉科专病大数据平台的设计与实践   \n",
       "25          26                                 大数据背景的电子商务商品实体识别算法   \n",
       "26          27                           基于分布式数据库的大数据平台动态页面数据生成技术   \n",
       "27          28                    产教融合背景下高职院校大数据技术与应用专业人才培养的探索与实践   \n",
       "28          29                 人工智能视域下影音档案修复与传播路径探讨——以百年前北京影像修复为例   \n",
       "29          30                           一种用于大数据的改进的ItemBased推荐算法   \n",
       "30          31                                       铜仁市农业大数据发展探讨   \n",
       "31          32                             大数据视域下公安院校网络思想政治教育创新研究   \n",
       "32          33                               大数据背景下暴恐事件的特点及应对策略研究   \n",
       "33          34                             大数据时代高校信息化教学资源共享平台构建探析   \n",
       "34          35                            基于大数据的数字化电能计量误差分析  网络首发   \n",
       "35          36                                大数据背景下食用菌企业精准营销路径研究   \n",
       "36          37                              大数据背景下基层党组织建设的困境与解决路径   \n",
       "37          38                                   大数据智能审计助推企业高质量发展   \n",
       "38          39                      从人工路巡到智能发现 图元视觉AI智能平台让城市管理更智慧   \n",
       "39          40                                      人工智能驾驶技术商业化分析   \n",
       "40          41                              大数据背景下基层党组织建设的困境与解决路径   \n",
       "41          42                                      人工智能驾驶技术商业化分析   \n",
       "42          43                                   大数据智能审计助推企业高质量发展   \n",
       "43          44                                大数据背景下食用菌企业精准营销路径研究   \n",
       "44          45                      从人工路巡到智能发现 图元视觉AI智能平台让城市管理更智慧   \n",
       "45          46                                   快递100将推出包装碳排放大数据   \n",
       "46          47                                    人工智能在图书馆的实施路径研究   \n",
       "47          48                                 浅谈“AI+云”技术在现代农业的运用   \n",
       "48          49                           基于大数据分析的在线教学有效性提升策略与实施路径   \n",
       "49          50                               “互联网+人工智能”赋能下的国企党建创新   \n",
       "\n",
       "                       作者               刊名              发表时间  被引     下载  操作  \\\n",
       "0             孔媛媛; 张舒; 王爱            档案与建设  2021-07-02 09:42 NaN    NaN  下载   \n",
       "1                      雷琼   中小企业管理与科技(中旬刊)        2021-07-02 NaN    NaN  下载   \n",
       "2        赵嘉宁; 包吉; 孟唤婷; 刘旸   中小企业管理与科技(中旬刊)        2021-07-02 NaN    NaN  下载   \n",
       "3                     韦金孜   中小企业管理与科技(中旬刊)        2021-07-02 NaN    NaN  下载   \n",
       "4                 崔超; 马卉宇  齐齐哈尔大学学报(自然科学版)        2021-06-30 NaN   20.0  下载   \n",
       "5            喻国明; 杨雅; 陈雪娇              新闻界  2021-06-28 15:33 NaN  165.0  下载   \n",
       "6                     赵根良   中小企业管理与科技(下旬刊)        2021-06-28 NaN  171.0  下载   \n",
       "7                     黄友鑫   中小企业管理与科技(下旬刊)        2021-06-28 NaN   61.0  下载   \n",
       "8                      雷铮          高校辅导员学刊        2021-06-28 NaN    NaN  下载   \n",
       "9                沈凤娇; 余晓敏           地理空间信息        2021-06-28 NaN    NaN  下载   \n",
       "10                    廖小霞             财会学习        2021-06-25 NaN   32.0  下载   \n",
       "11      余淳;叶志宏;李章炳;白双;曾祥伦         中国循证医学杂志        2021-06-25 NaN   10.0  下载   \n",
       "12                     陈琪         洛阳师范学院学报        2021-06-25 NaN   37.0  下载   \n",
       "13               曹达钦; 戴钰涵              外语界        2021-06-25 NaN    NaN  下载   \n",
       "14                    杜志强        智能建筑与智慧城市        2021-06-24 NaN  228.0  下载   \n",
       "15      刘吉; 孙俊英; 陈忠超; 蔡忠亮        测绘与空间地理信息        2021-06-24 NaN   37.0  下载   \n",
       "16               徐爱兰; 耿建生        环境监测管理与技术        2021-06-22 NaN  186.0  下载   \n",
       "17                    林振刚   中小企业管理与科技(上旬刊)        2021-06-22 NaN  401.0  下载   \n",
       "18                     刘颖   中小企业管理与科技(上旬刊)        2021-06-22 NaN  196.0  下载   \n",
       "19      张波;赵耀忠;刘跃;田文明;赵晋松             热力发电  2021-06-21 17:09 NaN   76.0  下载   \n",
       "20                王妍; 王婉婷            中国信息化        2021-06-20 NaN  216.0  下载   \n",
       "21                     陈静            中国信息化        2021-06-20 NaN  133.0  下载   \n",
       "22  吴先洪; 车辉; 杨波; 邢慧芬; 樊玉琦            物联网技术        2021-06-20 NaN   49.0  下载   \n",
       "23      刘国松; 万平; 李先利; 吴书金            物联网技术        2021-06-20 NaN   14.0  下载   \n",
       "24      戴静娟; 胡萍; 陈名扬; 黄晓琴       中国卫生信息管理杂志        2021-06-20 NaN   17.0  下载   \n",
       "25                    王玉玲           微型电脑应用        2021-06-20 NaN   61.0  下载   \n",
       "26                    苏莉娜           微型电脑应用        2021-06-20 NaN   34.0  下载   \n",
       "27                    王雪松              科技风        2021-06-20 NaN   53.0  下载   \n",
       "28                    周子晴             北京档案        2021-06-20 NaN    7.0  下载   \n",
       "29                李洋; 黄树成         计算机与数字工程        2021-06-20 NaN    1.0  下载   \n",
       "30       张军; 黄义君; 王海军; 万伟           现代农业科技        2021-06-20 NaN    NaN  下载   \n",
       "31                    赵志强         福建警察学院学报        2021-06-20 NaN    NaN  下载   \n",
       "32           綦凤; 杨金召; 涂瀚文     河北公安警察职业学院学报        2021-06-18 NaN   27.0  下载   \n",
       "33                     魏妍        电脑编程技巧与维护        2021-06-18 NaN    4.0  下载   \n",
       "34         田园; 张梅; 保富; 原野            电测与仪表  2021-06-17 10:38 NaN   69.0  下载   \n",
       "35                    范文婷             北方经贸        2021-06-15 NaN   92.0  下载   \n",
       "36                     周青   中小企业管理与科技(中旬刊)        2021-06-15 NaN   49.0  下载   \n",
       "37     楚文光; 胡为民; 雷年桢; 陈赛霞           中国内部审计        2021-06-15 NaN  198.0  下载   \n",
       "38                     周兴          中国建设信息化        2021-06-15 NaN    9.0  下载   \n",
       "39                    孙吴炎            办公自动化        2021-06-15 NaN   82.0  下载   \n",
       "40                     周青   中小企业管理与科技(中旬刊)        2021-06-15 NaN   49.0  下载   \n",
       "41                    孙吴炎            办公自动化        2021-06-15 NaN   82.0  下载   \n",
       "42     楚文光; 胡为民; 雷年桢; 陈赛霞           中国内部审计        2021-06-15 NaN  198.0  下载   \n",
       "43                    范文婷             北方经贸        2021-06-15 NaN   92.0  下载   \n",
       "44                     周兴          中国建设信息化        2021-06-15 NaN    9.0  下载   \n",
       "45                    NaN             中国包装        2021-06-15 NaN   30.0  下载   \n",
       "46                    侯志江         图书馆工作与研究        2021-06-15 NaN   94.0  下载   \n",
       "47          杨永西; 罗岳文; 陈向生            现代化农业        2021-06-15 NaN   39.0  下载   \n",
       "48                    张汉萍       武汉职业技术学院学报        2021-06-15 NaN   49.0  下载   \n",
       "49                    胡秀英             现代企业        2021-06-15 NaN   17.0  下载   \n",
       "\n",
       "                                                详细页链接  \n",
       "0   https://kns.cnki.net/kcms/detail/detail.aspx?d...  \n",
       "1                                                 NaN  \n",
       "2                                                 NaN  \n",
       "3                                                 NaN  \n",
       "4                                                 NaN  \n",
       "5                                                 NaN  \n",
       "6                                                 NaN  \n",
       "7                                                 NaN  \n",
       "8                                                 NaN  \n",
       "9                                                 NaN  \n",
       "10                                                NaN  \n",
       "11                                                NaN  \n",
       "12                                                NaN  \n",
       "13                                                NaN  \n",
       "14                                                NaN  \n",
       "15                                                NaN  \n",
       "16                                                NaN  \n",
       "17                                                NaN  \n",
       "18                                                NaN  \n",
       "19                                                NaN  \n",
       "20                                                NaN  \n",
       "21                                                NaN  \n",
       "22                                                NaN  \n",
       "23                                                NaN  \n",
       "24                                                NaN  \n",
       "25                                                NaN  \n",
       "26                                                NaN  \n",
       "27                                                NaN  \n",
       "28                                                NaN  \n",
       "29                                                NaN  \n",
       "30                                                NaN  \n",
       "31                                                NaN  \n",
       "32                                                NaN  \n",
       "33                                                NaN  \n",
       "34                                                NaN  \n",
       "35                                                NaN  \n",
       "36                                                NaN  \n",
       "37                                                NaN  \n",
       "38                                                NaN  \n",
       "39                                                NaN  \n",
       "40                                                NaN  \n",
       "41                                                NaN  \n",
       "42                                                NaN  \n",
       "43                                                NaN  \n",
       "44                                                NaN  \n",
       "45                                                NaN  \n",
       "46                                                NaN  \n",
       "47                                                NaN  \n",
       "48                                                NaN  \n",
       "49                                                NaN  "
      ]
     },
     "execution_count": 291,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "links=[]\n",
    "\n",
    "for link in href:\n",
    "    link=correct(link)\n",
    "    links.append(link)\n",
    "    df_links=pd.DataFrame({'详细页链接':links})\n",
    "    df_首页=pd.concat([pd.read_html(page_html)[0],df_links],axis=1)\n",
    "\n",
    "df_首页"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 292,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 获取详细页面中的PDF下载链接\n",
    "from requests_html import HTMLSession\n",
    "import urllib.parse\n",
    "\n",
    "session = HTMLSession()\n",
    "\n",
    "pdf_links=[]\n",
    "\n",
    "for url in links:\n",
    "    r = session.get(url)\n",
    "    pdf_link_xpath=r.html.xpath('//*[@id=\"pdfDown\"]/@href')[1]\n",
    "    pdf_links.append(pdf_link_xpath)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 293,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['/kcms/download.aspx?filename=klFaXt2a4cjM5pWdzh1RSBHTNZkMwAVbyJWcTRER2d2Rl5kVlhzY18WeVV1ck1GNzB1K0MEVPp1SXBnS4ckYERnYFNGOCtyVxRlNvkkMohTQ1sSZWdmb3hmMoVGSzhHe00UVqpkQrN3M4I3V4dkNyNncVFmetZzS&tablename=CAPJLAST&dflag=pdfdown']"
      ]
     },
     "execution_count": 293,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pdf_links"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 294,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['https://kns.cnki.net/kcms/download.aspx?filename=klFaXt2a4cjM5pWdzh1RSBHTNZkMwAVbyJWcTRER2d2Rl5kVlhzY18WeVV1ck1GNzB1K0MEVPp1SXBnS4ckYERnYFNGOCtyVxRlNvkkMohTQ1sSZWdmb3hmMoVGSzhHe00UVqpkQrN3M4I3V4dkNyNncVFmetZzS&tablename=CAPJLAST&dflag=pdfdown']"
      ]
     },
     "execution_count": 294,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 修复正确的PDF下载链接\n",
    "pdf_href=['https://kns.cnki.net'+link for link in pdf_links]\n",
    "pdf_href"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 295,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>篇名</th>\n",
       "      <th>作者</th>\n",
       "      <th>刊名</th>\n",
       "      <th>发表时间</th>\n",
       "      <th>被引</th>\n",
       "      <th>下载</th>\n",
       "      <th>操作</th>\n",
       "      <th>详细页链接</th>\n",
       "      <th>PDF下载链接</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>大数据背景下档案信息服务体系构建方法探析  网络首发</td>\n",
       "      <td>孔媛媛; 张舒; 王爱</td>\n",
       "      <td>档案与建设</td>\n",
       "      <td>2021-07-02 09:42</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "      <td>https://kns.cnki.net/kcms/detail/detail.aspx?d...</td>\n",
       "      <td>https://kns.cnki.net/kcms/download.aspx?filena...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>人工智能背景下企业内部审计优化的探讨</td>\n",
       "      <td>雷琼</td>\n",
       "      <td>中小企业管理与科技(中旬刊)</td>\n",
       "      <td>2021-07-02</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>大数据背景下的农村精准扶贫调查报告——以辽宁省阜蒙县为例</td>\n",
       "      <td>赵嘉宁; 包吉; 孟唤婷; 刘旸</td>\n",
       "      <td>中小企业管理与科技(中旬刊)</td>\n",
       "      <td>2021-07-02</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>大数据时代背景下的汽车服务营销及策略研究</td>\n",
       "      <td>韦金孜</td>\n",
       "      <td>中小企业管理与科技(中旬刊)</td>\n",
       "      <td>2021-07-02</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>基于金融大数据平台的用户数据生命周期管理的研究</td>\n",
       "      <td>崔超; 马卉宇</td>\n",
       "      <td>齐齐哈尔大学学报(自然科学版)</td>\n",
       "      <td>2021-06-30</td>\n",
       "      <td>NaN</td>\n",
       "      <td>20.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>平台视域下全国居民疫苗接种的认知、意愿及影响要素——基于五大互联网平台的舆情大数据分析  网络首发</td>\n",
       "      <td>喻国明; 杨雅; 陈雪娇</td>\n",
       "      <td>新闻界</td>\n",
       "      <td>2021-06-28 15:33</td>\n",
       "      <td>NaN</td>\n",
       "      <td>165.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>人工智能背景下我国人力资源服务业的SWOT分析及发展策略</td>\n",
       "      <td>赵根良</td>\n",
       "      <td>中小企业管理与科技(下旬刊)</td>\n",
       "      <td>2021-06-28</td>\n",
       "      <td>NaN</td>\n",
       "      <td>171.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8</td>\n",
       "      <td>大数据技术下金融审计管理研究</td>\n",
       "      <td>黄友鑫</td>\n",
       "      <td>中小企业管理与科技(下旬刊)</td>\n",
       "      <td>2021-06-28</td>\n",
       "      <td>NaN</td>\n",
       "      <td>61.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>9</td>\n",
       "      <td>人工智能时代高校思想政治教育的嬗变与重构</td>\n",
       "      <td>雷铮</td>\n",
       "      <td>高校辅导员学刊</td>\n",
       "      <td>2021-06-28</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>10</td>\n",
       "      <td>时空大数据平台云计算及其典型应用服务探讨</td>\n",
       "      <td>沈凤娇; 余晓敏</td>\n",
       "      <td>地理空间信息</td>\n",
       "      <td>2021-06-28</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>11</td>\n",
       "      <td>大数据时代下事业单位财务管理的创新路径探索</td>\n",
       "      <td>廖小霞</td>\n",
       "      <td>财会学习</td>\n",
       "      <td>2021-06-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>32.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>12</td>\n",
       "      <td>基于人事信息系统的大型综合性医院“大人事”大数据管理</td>\n",
       "      <td>余淳;叶志宏;李章炳;白双;曾祥伦</td>\n",
       "      <td>中国循证医学杂志</td>\n",
       "      <td>2021-06-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>10.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>13</td>\n",
       "      <td>大数据时代高校思想政治教育模式的创新</td>\n",
       "      <td>陈琪</td>\n",
       "      <td>洛阳师范学院学报</td>\n",
       "      <td>2021-06-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>37.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>14</td>\n",
       "      <td>人工智能时代高校翻译技术实践环境建设研究</td>\n",
       "      <td>曹达钦; 戴钰涵</td>\n",
       "      <td>外语界</td>\n",
       "      <td>2021-06-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>15</td>\n",
       "      <td>浅谈大数据在新能源汽车领域的应用</td>\n",
       "      <td>杜志强</td>\n",
       "      <td>智能建筑与智慧城市</td>\n",
       "      <td>2021-06-24</td>\n",
       "      <td>NaN</td>\n",
       "      <td>228.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>16</td>\n",
       "      <td>基于ElasticSearch全文检索的农业地理信息大数据平台设计与实现</td>\n",
       "      <td>刘吉; 孙俊英; 陈忠超; 蔡忠亮</td>\n",
       "      <td>测绘与空间地理信息</td>\n",
       "      <td>2021-06-24</td>\n",
       "      <td>NaN</td>\n",
       "      <td>37.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>17</td>\n",
       "      <td>基于5G与AI的生态环境监测网络平台探讨</td>\n",
       "      <td>徐爱兰; 耿建生</td>\n",
       "      <td>环境监测管理与技术</td>\n",
       "      <td>2021-06-22</td>\n",
       "      <td>NaN</td>\n",
       "      <td>186.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>18</td>\n",
       "      <td>大数据背景下管理会计新探讨</td>\n",
       "      <td>林振刚</td>\n",
       "      <td>中小企业管理与科技(上旬刊)</td>\n",
       "      <td>2021-06-22</td>\n",
       "      <td>NaN</td>\n",
       "      <td>401.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>19</td>\n",
       "      <td>大数据时代会计信息化风险因素与防范对策</td>\n",
       "      <td>刘颖</td>\n",
       "      <td>中小企业管理与科技(上旬刊)</td>\n",
       "      <td>2021-06-22</td>\n",
       "      <td>NaN</td>\n",
       "      <td>196.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>20</td>\n",
       "      <td>基于大数据的煤矿综合业务管控平台开发  网络首发</td>\n",
       "      <td>张波;赵耀忠;刘跃;田文明;赵晋松</td>\n",
       "      <td>热力发电</td>\n",
       "      <td>2021-06-21 17:09</td>\n",
       "      <td>NaN</td>\n",
       "      <td>76.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>21</td>\n",
       "      <td>互联网大数据时代背景下大学生碎片化学习的策略研究</td>\n",
       "      <td>王妍; 王婉婷</td>\n",
       "      <td>中国信息化</td>\n",
       "      <td>2021-06-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>216.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>22</td>\n",
       "      <td>大数据融合分析应用的多角色探索</td>\n",
       "      <td>陈静</td>\n",
       "      <td>中国信息化</td>\n",
       "      <td>2021-06-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>133.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>23</td>\n",
       "      <td>基于大数据的智慧消防系统架构探讨</td>\n",
       "      <td>吴先洪; 车辉; 杨波; 邢慧芬; 樊玉琦</td>\n",
       "      <td>物联网技术</td>\n",
       "      <td>2021-06-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>49.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>24</td>\n",
       "      <td>军队院校物联网工程专业大数据实验室建设方案研究</td>\n",
       "      <td>刘国松; 万平; 李先利; 吴书金</td>\n",
       "      <td>物联网技术</td>\n",
       "      <td>2021-06-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>14.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>25</td>\n",
       "      <td>耳鼻喉科专病大数据平台的设计与实践</td>\n",
       "      <td>戴静娟; 胡萍; 陈名扬; 黄晓琴</td>\n",
       "      <td>中国卫生信息管理杂志</td>\n",
       "      <td>2021-06-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>17.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>26</td>\n",
       "      <td>大数据背景的电子商务商品实体识别算法</td>\n",
       "      <td>王玉玲</td>\n",
       "      <td>微型电脑应用</td>\n",
       "      <td>2021-06-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>61.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>27</td>\n",
       "      <td>基于分布式数据库的大数据平台动态页面数据生成技术</td>\n",
       "      <td>苏莉娜</td>\n",
       "      <td>微型电脑应用</td>\n",
       "      <td>2021-06-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>34.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>28</td>\n",
       "      <td>产教融合背景下高职院校大数据技术与应用专业人才培养的探索与实践</td>\n",
       "      <td>王雪松</td>\n",
       "      <td>科技风</td>\n",
       "      <td>2021-06-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>53.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>29</td>\n",
       "      <td>人工智能视域下影音档案修复与传播路径探讨——以百年前北京影像修复为例</td>\n",
       "      <td>周子晴</td>\n",
       "      <td>北京档案</td>\n",
       "      <td>2021-06-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>30</td>\n",
       "      <td>一种用于大数据的改进的ItemBased推荐算法</td>\n",
       "      <td>李洋; 黄树成</td>\n",
       "      <td>计算机与数字工程</td>\n",
       "      <td>2021-06-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>31</td>\n",
       "      <td>铜仁市农业大数据发展探讨</td>\n",
       "      <td>张军; 黄义君; 王海军; 万伟</td>\n",
       "      <td>现代农业科技</td>\n",
       "      <td>2021-06-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>32</td>\n",
       "      <td>大数据视域下公安院校网络思想政治教育创新研究</td>\n",
       "      <td>赵志强</td>\n",
       "      <td>福建警察学院学报</td>\n",
       "      <td>2021-06-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>33</td>\n",
       "      <td>大数据背景下暴恐事件的特点及应对策略研究</td>\n",
       "      <td>綦凤; 杨金召; 涂瀚文</td>\n",
       "      <td>河北公安警察职业学院学报</td>\n",
       "      <td>2021-06-18</td>\n",
       "      <td>NaN</td>\n",
       "      <td>27.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>34</td>\n",
       "      <td>大数据时代高校信息化教学资源共享平台构建探析</td>\n",
       "      <td>魏妍</td>\n",
       "      <td>电脑编程技巧与维护</td>\n",
       "      <td>2021-06-18</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>35</td>\n",
       "      <td>基于大数据的数字化电能计量误差分析  网络首发</td>\n",
       "      <td>田园; 张梅; 保富; 原野</td>\n",
       "      <td>电测与仪表</td>\n",
       "      <td>2021-06-17 10:38</td>\n",
       "      <td>NaN</td>\n",
       "      <td>69.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>36</td>\n",
       "      <td>大数据背景下食用菌企业精准营销路径研究</td>\n",
       "      <td>范文婷</td>\n",
       "      <td>北方经贸</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>92.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>37</td>\n",
       "      <td>大数据背景下基层党组织建设的困境与解决路径</td>\n",
       "      <td>周青</td>\n",
       "      <td>中小企业管理与科技(中旬刊)</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>49.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>38</td>\n",
       "      <td>大数据智能审计助推企业高质量发展</td>\n",
       "      <td>楚文光; 胡为民; 雷年桢; 陈赛霞</td>\n",
       "      <td>中国内部审计</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>198.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>39</td>\n",
       "      <td>从人工路巡到智能发现 图元视觉AI智能平台让城市管理更智慧</td>\n",
       "      <td>周兴</td>\n",
       "      <td>中国建设信息化</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>40</td>\n",
       "      <td>人工智能驾驶技术商业化分析</td>\n",
       "      <td>孙吴炎</td>\n",
       "      <td>办公自动化</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>82.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>41</td>\n",
       "      <td>大数据背景下基层党组织建设的困境与解决路径</td>\n",
       "      <td>周青</td>\n",
       "      <td>中小企业管理与科技(中旬刊)</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>49.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>42</td>\n",
       "      <td>人工智能驾驶技术商业化分析</td>\n",
       "      <td>孙吴炎</td>\n",
       "      <td>办公自动化</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>82.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>43</td>\n",
       "      <td>大数据智能审计助推企业高质量发展</td>\n",
       "      <td>楚文光; 胡为民; 雷年桢; 陈赛霞</td>\n",
       "      <td>中国内部审计</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>198.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>44</td>\n",
       "      <td>大数据背景下食用菌企业精准营销路径研究</td>\n",
       "      <td>范文婷</td>\n",
       "      <td>北方经贸</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>92.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>45</td>\n",
       "      <td>从人工路巡到智能发现 图元视觉AI智能平台让城市管理更智慧</td>\n",
       "      <td>周兴</td>\n",
       "      <td>中国建设信息化</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>46</td>\n",
       "      <td>快递100将推出包装碳排放大数据</td>\n",
       "      <td>NaN</td>\n",
       "      <td>中国包装</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>30.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>47</td>\n",
       "      <td>人工智能在图书馆的实施路径研究</td>\n",
       "      <td>侯志江</td>\n",
       "      <td>图书馆工作与研究</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>94.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>48</td>\n",
       "      <td>浅谈“AI+云”技术在现代农业的运用</td>\n",
       "      <td>杨永西; 罗岳文; 陈向生</td>\n",
       "      <td>现代化农业</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>39.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>49</td>\n",
       "      <td>基于大数据分析的在线教学有效性提升策略与实施路径</td>\n",
       "      <td>张汉萍</td>\n",
       "      <td>武汉职业技术学院学报</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>49.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>50</td>\n",
       "      <td>“互联网+人工智能”赋能下的国企党建创新</td>\n",
       "      <td>胡秀英</td>\n",
       "      <td>现代企业</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>17.0</td>\n",
       "      <td>下载</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Unnamed: 0                                                 篇名  \\\n",
       "0            1                         大数据背景下档案信息服务体系构建方法探析  网络首发   \n",
       "1            2                                 人工智能背景下企业内部审计优化的探讨   \n",
       "2            3                       大数据背景下的农村精准扶贫调查报告——以辽宁省阜蒙县为例   \n",
       "3            4                               大数据时代背景下的汽车服务营销及策略研究   \n",
       "4            5                            基于金融大数据平台的用户数据生命周期管理的研究   \n",
       "5            6  平台视域下全国居民疫苗接种的认知、意愿及影响要素——基于五大互联网平台的舆情大数据分析  网络首发   \n",
       "6            7                       人工智能背景下我国人力资源服务业的SWOT分析及发展策略   \n",
       "7            8                                     大数据技术下金融审计管理研究   \n",
       "8            9                               人工智能时代高校思想政治教育的嬗变与重构   \n",
       "9           10                               时空大数据平台云计算及其典型应用服务探讨   \n",
       "10          11                              大数据时代下事业单位财务管理的创新路径探索   \n",
       "11          12                         基于人事信息系统的大型综合性医院“大人事”大数据管理   \n",
       "12          13                                 大数据时代高校思想政治教育模式的创新   \n",
       "13          14                               人工智能时代高校翻译技术实践环境建设研究   \n",
       "14          15                                   浅谈大数据在新能源汽车领域的应用   \n",
       "15          16               基于ElasticSearch全文检索的农业地理信息大数据平台设计与实现   \n",
       "16          17                               基于5G与AI的生态环境监测网络平台探讨   \n",
       "17          18                                      大数据背景下管理会计新探讨   \n",
       "18          19                                大数据时代会计信息化风险因素与防范对策   \n",
       "19          20                           基于大数据的煤矿综合业务管控平台开发  网络首发   \n",
       "20          21                           互联网大数据时代背景下大学生碎片化学习的策略研究   \n",
       "21          22                                    大数据融合分析应用的多角色探索   \n",
       "22          23                                   基于大数据的智慧消防系统架构探讨   \n",
       "23          24                            军队院校物联网工程专业大数据实验室建设方案研究   \n",
       "24          25                                  耳鼻喉科专病大数据平台的设计与实践   \n",
       "25          26                                 大数据背景的电子商务商品实体识别算法   \n",
       "26          27                           基于分布式数据库的大数据平台动态页面数据生成技术   \n",
       "27          28                    产教融合背景下高职院校大数据技术与应用专业人才培养的探索与实践   \n",
       "28          29                 人工智能视域下影音档案修复与传播路径探讨——以百年前北京影像修复为例   \n",
       "29          30                           一种用于大数据的改进的ItemBased推荐算法   \n",
       "30          31                                       铜仁市农业大数据发展探讨   \n",
       "31          32                             大数据视域下公安院校网络思想政治教育创新研究   \n",
       "32          33                               大数据背景下暴恐事件的特点及应对策略研究   \n",
       "33          34                             大数据时代高校信息化教学资源共享平台构建探析   \n",
       "34          35                            基于大数据的数字化电能计量误差分析  网络首发   \n",
       "35          36                                大数据背景下食用菌企业精准营销路径研究   \n",
       "36          37                              大数据背景下基层党组织建设的困境与解决路径   \n",
       "37          38                                   大数据智能审计助推企业高质量发展   \n",
       "38          39                      从人工路巡到智能发现 图元视觉AI智能平台让城市管理更智慧   \n",
       "39          40                                      人工智能驾驶技术商业化分析   \n",
       "40          41                              大数据背景下基层党组织建设的困境与解决路径   \n",
       "41          42                                      人工智能驾驶技术商业化分析   \n",
       "42          43                                   大数据智能审计助推企业高质量发展   \n",
       "43          44                                大数据背景下食用菌企业精准营销路径研究   \n",
       "44          45                      从人工路巡到智能发现 图元视觉AI智能平台让城市管理更智慧   \n",
       "45          46                                   快递100将推出包装碳排放大数据   \n",
       "46          47                                    人工智能在图书馆的实施路径研究   \n",
       "47          48                                 浅谈“AI+云”技术在现代农业的运用   \n",
       "48          49                           基于大数据分析的在线教学有效性提升策略与实施路径   \n",
       "49          50                               “互联网+人工智能”赋能下的国企党建创新   \n",
       "\n",
       "                       作者               刊名              发表时间  被引     下载  操作  \\\n",
       "0             孔媛媛; 张舒; 王爱            档案与建设  2021-07-02 09:42 NaN    NaN  下载   \n",
       "1                      雷琼   中小企业管理与科技(中旬刊)        2021-07-02 NaN    NaN  下载   \n",
       "2        赵嘉宁; 包吉; 孟唤婷; 刘旸   中小企业管理与科技(中旬刊)        2021-07-02 NaN    NaN  下载   \n",
       "3                     韦金孜   中小企业管理与科技(中旬刊)        2021-07-02 NaN    NaN  下载   \n",
       "4                 崔超; 马卉宇  齐齐哈尔大学学报(自然科学版)        2021-06-30 NaN   20.0  下载   \n",
       "5            喻国明; 杨雅; 陈雪娇              新闻界  2021-06-28 15:33 NaN  165.0  下载   \n",
       "6                     赵根良   中小企业管理与科技(下旬刊)        2021-06-28 NaN  171.0  下载   \n",
       "7                     黄友鑫   中小企业管理与科技(下旬刊)        2021-06-28 NaN   61.0  下载   \n",
       "8                      雷铮          高校辅导员学刊        2021-06-28 NaN    NaN  下载   \n",
       "9                沈凤娇; 余晓敏           地理空间信息        2021-06-28 NaN    NaN  下载   \n",
       "10                    廖小霞             财会学习        2021-06-25 NaN   32.0  下载   \n",
       "11      余淳;叶志宏;李章炳;白双;曾祥伦         中国循证医学杂志        2021-06-25 NaN   10.0  下载   \n",
       "12                     陈琪         洛阳师范学院学报        2021-06-25 NaN   37.0  下载   \n",
       "13               曹达钦; 戴钰涵              外语界        2021-06-25 NaN    NaN  下载   \n",
       "14                    杜志强        智能建筑与智慧城市        2021-06-24 NaN  228.0  下载   \n",
       "15      刘吉; 孙俊英; 陈忠超; 蔡忠亮        测绘与空间地理信息        2021-06-24 NaN   37.0  下载   \n",
       "16               徐爱兰; 耿建生        环境监测管理与技术        2021-06-22 NaN  186.0  下载   \n",
       "17                    林振刚   中小企业管理与科技(上旬刊)        2021-06-22 NaN  401.0  下载   \n",
       "18                     刘颖   中小企业管理与科技(上旬刊)        2021-06-22 NaN  196.0  下载   \n",
       "19      张波;赵耀忠;刘跃;田文明;赵晋松             热力发电  2021-06-21 17:09 NaN   76.0  下载   \n",
       "20                王妍; 王婉婷            中国信息化        2021-06-20 NaN  216.0  下载   \n",
       "21                     陈静            中国信息化        2021-06-20 NaN  133.0  下载   \n",
       "22  吴先洪; 车辉; 杨波; 邢慧芬; 樊玉琦            物联网技术        2021-06-20 NaN   49.0  下载   \n",
       "23      刘国松; 万平; 李先利; 吴书金            物联网技术        2021-06-20 NaN   14.0  下载   \n",
       "24      戴静娟; 胡萍; 陈名扬; 黄晓琴       中国卫生信息管理杂志        2021-06-20 NaN   17.0  下载   \n",
       "25                    王玉玲           微型电脑应用        2021-06-20 NaN   61.0  下载   \n",
       "26                    苏莉娜           微型电脑应用        2021-06-20 NaN   34.0  下载   \n",
       "27                    王雪松              科技风        2021-06-20 NaN   53.0  下载   \n",
       "28                    周子晴             北京档案        2021-06-20 NaN    7.0  下载   \n",
       "29                李洋; 黄树成         计算机与数字工程        2021-06-20 NaN    1.0  下载   \n",
       "30       张军; 黄义君; 王海军; 万伟           现代农业科技        2021-06-20 NaN    NaN  下载   \n",
       "31                    赵志强         福建警察学院学报        2021-06-20 NaN    NaN  下载   \n",
       "32           綦凤; 杨金召; 涂瀚文     河北公安警察职业学院学报        2021-06-18 NaN   27.0  下载   \n",
       "33                     魏妍        电脑编程技巧与维护        2021-06-18 NaN    4.0  下载   \n",
       "34         田园; 张梅; 保富; 原野            电测与仪表  2021-06-17 10:38 NaN   69.0  下载   \n",
       "35                    范文婷             北方经贸        2021-06-15 NaN   92.0  下载   \n",
       "36                     周青   中小企业管理与科技(中旬刊)        2021-06-15 NaN   49.0  下载   \n",
       "37     楚文光; 胡为民; 雷年桢; 陈赛霞           中国内部审计        2021-06-15 NaN  198.0  下载   \n",
       "38                     周兴          中国建设信息化        2021-06-15 NaN    9.0  下载   \n",
       "39                    孙吴炎            办公自动化        2021-06-15 NaN   82.0  下载   \n",
       "40                     周青   中小企业管理与科技(中旬刊)        2021-06-15 NaN   49.0  下载   \n",
       "41                    孙吴炎            办公自动化        2021-06-15 NaN   82.0  下载   \n",
       "42     楚文光; 胡为民; 雷年桢; 陈赛霞           中国内部审计        2021-06-15 NaN  198.0  下载   \n",
       "43                    范文婷             北方经贸        2021-06-15 NaN   92.0  下载   \n",
       "44                     周兴          中国建设信息化        2021-06-15 NaN    9.0  下载   \n",
       "45                    NaN             中国包装        2021-06-15 NaN   30.0  下载   \n",
       "46                    侯志江         图书馆工作与研究        2021-06-15 NaN   94.0  下载   \n",
       "47          杨永西; 罗岳文; 陈向生            现代化农业        2021-06-15 NaN   39.0  下载   \n",
       "48                    张汉萍       武汉职业技术学院学报        2021-06-15 NaN   49.0  下载   \n",
       "49                    胡秀英             现代企业        2021-06-15 NaN   17.0  下载   \n",
       "\n",
       "                                                详细页链接  \\\n",
       "0   https://kns.cnki.net/kcms/detail/detail.aspx?d...   \n",
       "1                                                 NaN   \n",
       "2                                                 NaN   \n",
       "3                                                 NaN   \n",
       "4                                                 NaN   \n",
       "5                                                 NaN   \n",
       "6                                                 NaN   \n",
       "7                                                 NaN   \n",
       "8                                                 NaN   \n",
       "9                                                 NaN   \n",
       "10                                                NaN   \n",
       "11                                                NaN   \n",
       "12                                                NaN   \n",
       "13                                                NaN   \n",
       "14                                                NaN   \n",
       "15                                                NaN   \n",
       "16                                                NaN   \n",
       "17                                                NaN   \n",
       "18                                                NaN   \n",
       "19                                                NaN   \n",
       "20                                                NaN   \n",
       "21                                                NaN   \n",
       "22                                                NaN   \n",
       "23                                                NaN   \n",
       "24                                                NaN   \n",
       "25                                                NaN   \n",
       "26                                                NaN   \n",
       "27                                                NaN   \n",
       "28                                                NaN   \n",
       "29                                                NaN   \n",
       "30                                                NaN   \n",
       "31                                                NaN   \n",
       "32                                                NaN   \n",
       "33                                                NaN   \n",
       "34                                                NaN   \n",
       "35                                                NaN   \n",
       "36                                                NaN   \n",
       "37                                                NaN   \n",
       "38                                                NaN   \n",
       "39                                                NaN   \n",
       "40                                                NaN   \n",
       "41                                                NaN   \n",
       "42                                                NaN   \n",
       "43                                                NaN   \n",
       "44                                                NaN   \n",
       "45                                                NaN   \n",
       "46                                                NaN   \n",
       "47                                                NaN   \n",
       "48                                                NaN   \n",
       "49                                                NaN   \n",
       "\n",
       "                                              PDF下载链接  \n",
       "0   https://kns.cnki.net/kcms/download.aspx?filena...  \n",
       "1                                                 NaN  \n",
       "2                                                 NaN  \n",
       "3                                                 NaN  \n",
       "4                                                 NaN  \n",
       "5                                                 NaN  \n",
       "6                                                 NaN  \n",
       "7                                                 NaN  \n",
       "8                                                 NaN  \n",
       "9                                                 NaN  \n",
       "10                                                NaN  \n",
       "11                                                NaN  \n",
       "12                                                NaN  \n",
       "13                                                NaN  \n",
       "14                                                NaN  \n",
       "15                                                NaN  \n",
       "16                                                NaN  \n",
       "17                                                NaN  \n",
       "18                                                NaN  \n",
       "19                                                NaN  \n",
       "20                                                NaN  \n",
       "21                                                NaN  \n",
       "22                                                NaN  \n",
       "23                                                NaN  \n",
       "24                                                NaN  \n",
       "25                                                NaN  \n",
       "26                                                NaN  \n",
       "27                                                NaN  \n",
       "28                                                NaN  \n",
       "29                                                NaN  \n",
       "30                                                NaN  \n",
       "31                                                NaN  \n",
       "32                                                NaN  \n",
       "33                                                NaN  \n",
       "34                                                NaN  \n",
       "35                                                NaN  \n",
       "36                                                NaN  \n",
       "37                                                NaN  \n",
       "38                                                NaN  \n",
       "39                                                NaN  \n",
       "40                                                NaN  \n",
       "41                                                NaN  \n",
       "42                                                NaN  \n",
       "43                                                NaN  \n",
       "44                                                NaN  \n",
       "45                                                NaN  \n",
       "46                                                NaN  \n",
       "47                                                NaN  \n",
       "48                                                NaN  \n",
       "49                                                NaN  "
      ]
     },
     "execution_count": 295,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_pdf_links=pd.DataFrame({'PDF下载链接':pdf_href})\n",
    "df_首页=pd.concat([df_首页,df_pdf_links],axis=1)\n",
    "df_首页"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 翻页以获取更多信息"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 296,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'下一页'"
      ]
     },
     "execution_count": 296,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "element = driver.find_element_by_id('PageNext')\n",
    "element.get_attribute('innerHTML')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 297,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'1/120'"
      ]
     },
     "execution_count": 297,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 获取跳转上限\n",
    "element = driver.find_element_by_xpath('//span[@class=\"countPageMark\"]')\n",
    "page_str = element.get_attribute('innerHTML')\n",
    "page_str "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 298,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['1', '120']"
      ]
     },
     "execution_count": 298,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "page_int = page_str.split('/')\n",
    "page_int"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 299,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120]\n"
     ]
    }
   ],
   "source": [
    "pages = list(range(1,int(page_int[1])+1))\n",
    "print(pages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 300,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 共45页，本次仅爬取前20页\n",
    "pages = list(range(1,21))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 301,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 函数（翻页）\n",
    "root_group=[]\n",
    "\n",
    "表格_html = dict()\n",
    "main_content =\"\"\n",
    "element = None\n",
    "\n",
    "def page_process(pages):\n",
    "    \"\"\"CNKI翻页获取信息\"\"\"\n",
    "    for p in pages:\n",
    "        print(p,end='\\t')\n",
    "        driver.find_element_by_id('PageNext').click()\n",
    "        time.sleep(15)\n",
    "        \n",
    "        html=driver.page_source\n",
    "        root=fromstring(html)\n",
    "        root_group.append(root)\n",
    "        \n",
    "        element = driver.find_element_by_id('gridTable')\n",
    "        main_content = element.get_attribute('innerHTML')\n",
    "        表格_html[p] = main_content"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 302,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\t2\t3\t4\t5\t6\t7\t8\t9\t10\t11\t12\t13\t14\t15\t16\t17\t18\t19\t20\t"
     ]
    }
   ],
   "source": [
    "page_process(pages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 303,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>html_snippets</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                        html_snippets\n",
       "1   \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "2   \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "3   \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "4   \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "5   \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "6   \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "7   \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "8   \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "9   \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "10  \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "11  \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "12  \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "13  \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "14  \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "15  \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "16  \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "17  \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "18  \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "19  \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "20  \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ..."
      ]
     },
     "execution_count": 303,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame([表格_html]).T\n",
    "df.columns = [\"html_snippets\"]\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 304,
   "metadata": {},
   "outputs": [],
   "source": [
    "l_df = []\n",
    "for p in pages:\n",
    "    表格 = pd.read_html(表格_html[p])[0]\n",
    "    l_df.append(表格)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 305,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>篇名</th>\n",
       "      <th>作者</th>\n",
       "      <th>刊名</th>\n",
       "      <th>发表时间</th>\n",
       "      <th>被引</th>\n",
       "      <th>下载</th>\n",
       "      <th>操作</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>51</td>\n",
       "      <td>新工科背景下电子通信专业“人工智能技术基础”课程改革</td>\n",
       "      <td>张珂; 戚银城; 孔英会; 赵振兵</td>\n",
       "      <td>电气电子教学学报</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>30.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>52</td>\n",
       "      <td>人工智能赋能在线实验教学行为分析</td>\n",
       "      <td>王鑫悦; 郑磊</td>\n",
       "      <td>电气电子教学学报</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>47.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>53</td>\n",
       "      <td>区域链交通大数据应用研究</td>\n",
       "      <td>任康; 沈剑; 潘江塞</td>\n",
       "      <td>粘接</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>56.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>54</td>\n",
       "      <td>人工智能背景下英语教学模式改革研究</td>\n",
       "      <td>程红梅</td>\n",
       "      <td>湖北开放职业学院学报</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>82.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>55</td>\n",
       "      <td>大数据背景下的SIOP教学模式在高职英语课堂中的运用研究</td>\n",
       "      <td>伍齐珊</td>\n",
       "      <td>湖北开放职业学院学报</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>20.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>995</th>\n",
       "      <td>1046</td>\n",
       "      <td>基于大数据视角的高职院校会计专业双创教育探讨</td>\n",
       "      <td>顾旭秋</td>\n",
       "      <td>辽宁师专学报(社会科学版)</td>\n",
       "      <td>2021-02-20</td>\n",
       "      <td>1.0</td>\n",
       "      <td>51.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>996</th>\n",
       "      <td>1047</td>\n",
       "      <td>近10年大数据研究热点演进及平台发展——以智慧教育领域1469篇文献计量分析为例</td>\n",
       "      <td>冯鑫; 李佳培; 吴晔; 徐晓婧; 郭源</td>\n",
       "      <td>中国高校科技</td>\n",
       "      <td>2021-02-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>572.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>997</th>\n",
       "      <td>1048</td>\n",
       "      <td>基于大数据技术的银行智能风控体系构建探究</td>\n",
       "      <td>陈振华</td>\n",
       "      <td>时代金融</td>\n",
       "      <td>2021-02-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>267.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>998</th>\n",
       "      <td>1049</td>\n",
       "      <td>基于Hadoop的高校校园大数据平台构建</td>\n",
       "      <td>黄少聪</td>\n",
       "      <td>安徽电子信息职业技术学院学报</td>\n",
       "      <td>2021-02-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>238.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>999</th>\n",
       "      <td>1050</td>\n",
       "      <td>基于大数据的在线学习行为研究——以青岛市崂山区在线学习平台数据为例</td>\n",
       "      <td>刘名卓; 李雪; 张泽治</td>\n",
       "      <td>数字教育</td>\n",
       "      <td>2021-02-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>131.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1000 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     Unnamed: 0                                        篇名  \\\n",
       "0            51                新工科背景下电子通信专业“人工智能技术基础”课程改革   \n",
       "1            52                          人工智能赋能在线实验教学行为分析   \n",
       "2            53                              区域链交通大数据应用研究   \n",
       "3            54                         人工智能背景下英语教学模式改革研究   \n",
       "4            55              大数据背景下的SIOP教学模式在高职英语课堂中的运用研究   \n",
       "..          ...                                       ...   \n",
       "995        1046                    基于大数据视角的高职院校会计专业双创教育探讨   \n",
       "996        1047  近10年大数据研究热点演进及平台发展——以智慧教育领域1469篇文献计量分析为例   \n",
       "997        1048                      基于大数据技术的银行智能风控体系构建探究   \n",
       "998        1049                      基于Hadoop的高校校园大数据平台构建   \n",
       "999        1050         基于大数据的在线学习行为研究——以青岛市崂山区在线学习平台数据为例   \n",
       "\n",
       "                       作者              刊名        发表时间   被引     下载  操作  \n",
       "0       张珂; 戚银城; 孔英会; 赵振兵        电气电子教学学报  2021-06-15  NaN   30.0  下载  \n",
       "1                 王鑫悦; 郑磊        电气电子教学学报  2021-06-15  NaN   47.0  下载  \n",
       "2             任康; 沈剑; 潘江塞              粘接  2021-06-15  NaN   56.0  下载  \n",
       "3                     程红梅      湖北开放职业学院学报  2021-06-15  NaN   82.0  下载  \n",
       "4                     伍齐珊      湖北开放职业学院学报  2021-06-15  NaN   20.0  下载  \n",
       "..                    ...             ...         ...  ...    ...  ..  \n",
       "995                   顾旭秋   辽宁师专学报(社会科学版)  2021-02-20  1.0   51.0  下载  \n",
       "996  冯鑫; 李佳培; 吴晔; 徐晓婧; 郭源          中国高校科技  2021-02-20  NaN  572.0  下载  \n",
       "997                   陈振华            时代金融  2021-02-20  NaN  267.0  下载  \n",
       "998                   黄少聪  安徽电子信息职业技术学院学报  2021-02-20  NaN  238.0  下载  \n",
       "999          刘名卓; 李雪; 张泽治            数字教育  2021-02-20  NaN  131.0  下载  \n",
       "\n",
       "[1000 rows x 8 columns]"
      ]
     },
     "execution_count": 305,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_url_out = pd.concat(l_df).reset_index(drop=True)\n",
    "df_url_out"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 308,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-308-b1ce047a8130>:1: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[1])\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<font class=\"Mark\">大数据</font>背景下精准营销策略研究——以A公司为例\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-308-b1ce047a8130>:12: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[2])\n",
      "<ipython-input-308-b1ce047a8130>:28: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[-1])\n",
      "<ipython-input-308-b1ce047a8130>:30: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[-2])\n",
      "<ipython-input-308-b1ce047a8130>:32: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[-1])\n",
      "<ipython-input-308-b1ce047a8130>:40: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[1])###回到链接页面\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<font class=\"Mark\">大数据</font>环境下情报学发展的十个特征\n",
      "<font class=\"Mark\">大数据</font>助力高校学风建设研究——以北京第二外国语学院为例\n",
      "<font class=\"Mark\">大数据</font>背景下高职院校治理能力提升探索\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-308-b1ce047a8130>:25: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[1])\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "基于<font class=\"Mark\">大数据</font>的高校学生学业评价体系探讨\n",
      "基于云<font class=\"Mark\">平台</font>的<font class=\"Mark\">人工智能</font>大棚种植系统关键技术研究\n",
      "构建支撑网络安全态势感知的<font class=\"Mark\">大数据平台</font>\n",
      "基于青海气象<font class=\"Mark\">大数据</font>云<font class=\"Mark\">平台</font>的数据服务接口\n",
      "高校<font class=\"Mark\">大数据平台</font>的设计及应用研究\n",
      "智慧校园<font class=\"Mark\">大数据</font>一体化<font class=\"Mark\">平台</font>设计与应用\n",
      "<font class=\"Mark\">大数据</font>时代公民个人信息法律保护存在的问题和对策\n",
      "<font class=\"Mark\">大数据</font>时代新型信访治理<font class=\"Mark\">平台</font>——“云上信访”建设浅析\n",
      "<font class=\"Mark\">大数据</font>时代互联网<font class=\"Mark\">平台</font>公开数据赋权保护的反思与法律救济进路\n",
      "基于<font class=\"Mark\">大数据</font>分析的南京市出租汽车行业发展研究\n",
      "浅析<font class=\"Mark\">大数据</font>在政府全面预算绩效管理中的应用\n",
      "<font class=\"Mark\">大数据</font>环境下企业财务管理转型\n",
      "<font class=\"Mark\">大数据</font>背景下对基层农业技术推广体系的创新分析\n",
      "基于<font class=\"Mark\">大数据</font>分析的电力系统远程运维及故障诊断\n",
      "基于Hadoop<font class=\"Mark\">平台</font>的电力行业<font class=\"Mark\">大数据</font>分析技术应用\n",
      "基于<font class=\"Mark\">人工智能</font>的计算机<font class=\"Mark\">大数据</font>安全技术<font class=\"Mark\">平台</font>构建\n",
      "疾病预警在健康<font class=\"Mark\">大数据</font>管理<font class=\"Mark\">平台</font>中的应用\n",
      "大气环境监测中<font class=\"Mark\">大数据</font>解析技术应用\n",
      "基于网络<font class=\"Mark\">大数据</font>的智能化加油站建设探析\n",
      "山东产研院发布4K<font class=\"Mark\">人工智能</font>视觉处理芯片\n",
      "基于协同理论的<font class=\"Mark\">大数据平台</font>资产数据管理研究\n",
      "基于<font class=\"Mark\">大数据</font>融合的智慧矿山<font class=\"Mark\">平台</font>搭建与应用\n",
      "基于数据挖掘技术的脑卒中ICF康复服务<font class=\"Mark\">大数据平台</font>构建研究\n",
      "基于云计算的脑卒中预防康复与护理<font class=\"Mark\">大数据平台</font>的设计与实现\n",
      "基于<font class=\"Mark\">大数据</font>的成绩数据分析<font class=\"Mark\">平台</font>设计与实现\n",
      "山东产研院发布4K<font class=\"Mark\">人工智能</font>视觉处理芯片\n",
      "基于<font class=\"Mark\">大数据</font>融合的智慧矿山<font class=\"Mark\">平台</font>搭建与应用\n",
      "基于数据挖掘技术的脑卒中ICF康复服务<font class=\"Mark\">大数据平台</font>构建研究\n",
      "基于云计算的脑卒中预防康复与护理<font class=\"Mark\">大数据平台</font>的设计与实现\n",
      "基于<font class=\"Mark\">大数据</font>的成绩数据分析<font class=\"Mark\">平台</font>设计与实现\n",
      "基于协同理论的<font class=\"Mark\">大数据平台</font>资产数据管理研究\n",
      "国家水资源监控工程的<font class=\"Mark\">大数据平台</font>建设\n",
      "融合<font class=\"Mark\">人工智能</font>语音<font class=\"Mark\">平台</font>架构设计及终端实现\n",
      "健康泰安<font class=\"Mark\">大数据平台</font>网络系统备份与恢复\n",
      "Seminar教学方法在<font class=\"Mark\">人工智能</font>课程教学中的应用研究——基于超星<font class=\"Mark\">平台</font>\n",
      "<font class=\"Mark\">人工智能</font>与制造业融合:基本内涵、发展态势与内在逻辑——兼论成都<font class=\"Mark\">人工智能</font>与制造业融合的探索与实践\n",
      "惠安县外走马埭海堤工程<font class=\"Mark\">人工智能</font>视频监测系统应用介绍\n",
      "新冠肺炎疫情防控背景下基于<font class=\"Mark\">大数据平台</font>的“轻直播式”线上教学模式的研究\n",
      "新冠肺炎疫情防控背景下基于<font class=\"Mark\">大数据平台</font>的“轻直播式”线上教学模式的研究\n",
      "<font class=\"Mark\">人工智能</font>对我国体育产业发展的推动作用\n",
      "数据可携权能否治理“<font class=\"Mark\">大数据</font>杀熟”？\n",
      "基于<font class=\"Mark\">大数据</font>视角的高职院校会计专业双创教育探讨\n",
      "近10年<font class=\"Mark\">大数据</font>研究热点演进及<font class=\"Mark\">平台</font>发展——以智慧教育领域1469篇文献计量分析为例\n",
      "基于<font class=\"Mark\">大数据</font>技术的银行智能风控体系构建探究\n",
      "基于Hadoop的高校校园<font class=\"Mark\">大数据平台</font>构建\n",
      "基于<font class=\"Mark\">大数据</font>的在线学习行为研究——以青岛市崂山区在线学习<font class=\"Mark\">平台</font>数据为例\n",
      "民航机场应急救援<font class=\"Mark\">大数据</font>处理<font class=\"Mark\">平台</font>架构研究\n",
      "基于“互联网+”的多元混合教学模式探索与实践——以“<font class=\"Mark\">大数据</font>处理技术”课程为例\n",
      "<font class=\"Mark\">AI</font>语音关键技术与业务实践研究\n",
      "基于<font class=\"Mark\">大数据</font>架构的智能交通可视化<font class=\"Mark\">平台</font>设计\n",
      "<font class=\"Mark\">人工智能</font>(<font class=\"Mark\">AI</font>)支持下的小学语文分级阅读教学策略探究\n",
      "中国移动的未来在<font class=\"Mark\">大数据</font>\n",
      "基于<font class=\"Mark\">大数据</font>提升工程设计质量的智能化审核创新质量管理\n",
      "<font class=\"Mark\">大数据</font>时代档案信息资源共享<font class=\"Mark\">平台</font>数据存储系统的设计与实现\n",
      "“互联网+”背景下TPS教学模式在<font class=\"Mark\">大数据</font>专业课程教学中的应用\n",
      "基于<font class=\"Mark\">大数据</font>的高职线上教学质量分析及“三教”改革探究\n",
      "<font class=\"Mark\">大数据</font>交易面临的机遇和挑战\n",
      "浅谈<font class=\"Mark\">大数据</font>分析技术在油田企业物资采购中的应用\n",
      "<font class=\"Mark\">人工智能</font>时代的法治图景——兼论《民法典》的智能维度\n",
      "<font class=\"Mark\">大数据</font>项目教学管理<font class=\"Mark\">平台</font>的研究\n",
      "司法<font class=\"Mark\">大数据</font>环境下的云<font class=\"Mark\">平台</font>资源池统一管理技术研究\n",
      "面向智慧校园的教育<font class=\"Mark\">大数据</font>分析关键技术\n",
      "基于新闻出版行业的<font class=\"Mark\">大数据</font>战略初探\n",
      "浅论<font class=\"Mark\">大数据</font>背景下的高校学生管理\n",
      "<font class=\"Mark\">大数据</font>时代高校工会参与职工文化建设路径研究\n",
      "湖南省精准扶贫中财务<font class=\"Mark\">大数据</font>的应用研究\n",
      "<font class=\"Mark\">大数据</font>视角下移动互联网的汽车营销策略分析\n",
      "<font class=\"Mark\">大数据</font>在构建智慧农业过程中对农业经济管理的重要影响研究\n",
      "新一代<font class=\"Mark\">人工智能</font>与广西实体经济融合发展：发展路径、重点领域与政策保障\n",
      "基于<font class=\"Mark\">大数据</font>分析的校园信息互助共享<font class=\"Mark\">平台</font>设计与开发研究\n",
      "基于地图交互的<font class=\"Mark\">大数据</font>动态可视化<font class=\"Mark\">平台</font>设计与研究\n",
      "<font class=\"Mark\">大数据</font>环境下智慧校园的设计与实现\n",
      "试谈<font class=\"Mark\">大数据</font>时代下的信息安全防护策略\n",
      "<font class=\"Mark\">人工智能</font>下“O2O”房屋租赁<font class=\"Mark\">平台</font>商业模式研究\n",
      "<font class=\"Mark\">大数据</font>环境下的企业采购成本控制策略初探\n",
      "<font class=\"Mark\">大数据</font>时代财务共享服务中心云<font class=\"Mark\">平台</font>的构建\n",
      "<font class=\"Mark\">大数据</font>在高校精准化就业服务工作中的运用\n",
      "<font class=\"Mark\">大数据</font>战略下高职院校会计专业教学改革途径探析\n",
      "<font class=\"Mark\">大数据</font>时代基于云会计的生产制造企业成本控制\n",
      "<font class=\"Mark\">大数据</font>、<font class=\"Mark\">AI平台</font>支撑下的智慧金融产品研发与实践\n",
      "<font class=\"Mark\">大数据</font>在电商供应链成本控制方面运用的研究——以京东为例\n",
      "<font class=\"Mark\">人工智能</font>技术在隧道提质升级中的应用\n",
      "<font class=\"Mark\">大数据</font>审计模式：价值、路径及趋势研究\n",
      "基于<font class=\"Mark\">大数据</font>技术的移动用户行为分析研究\n",
      "物联网+<font class=\"Mark\">大数据</font>技术在页岩气开发环保领域应用探索\n",
      "<font class=\"Mark\">大数据</font>应用云<font class=\"Mark\">平台</font>在重大疫情防控中的应用\n",
      "<font class=\"Mark\">大数据</font>技术在高校计算机信息教学中的研究\n",
      "中医药<font class=\"Mark\">大数据</font>云服务<font class=\"Mark\">平台</font>的医疗数据安全隐私保护设计\n",
      "<font class=\"Mark\">人工智能</font>时代政治谣言的算法治理:逻辑、限度与应对\n",
      "专题导读  应对疫情防控新挑战的<font class=\"Mark\">大数据</font>研究与开发\n",
      "<font class=\"Mark\">大数据</font>服务<font class=\"Mark\">平台</font>下的应急响应物资调度优化研究\n",
      "大胜达首位<font class=\"Mark\">AI</font>包装设计师诞生\n",
      "基于Hadoop的高速公路工程<font class=\"Mark\">大数据平台</font>搭建\n",
      "<font class=\"Mark\">AI</font>可视化智能消防综合信息<font class=\"Mark\">平台</font>发展研究\n",
      "基于层次化体系的武器系统<font class=\"Mark\">大数据</font>管理研究\n",
      "超图：打造<font class=\"Mark\">大数据</font>大<font class=\"Mark\">平台</font>  助力人民战“疫”\n",
      "<font class=\"Mark\">大数据</font>背景下基层党组织建设的困境与出路\n",
      "甘肃省<font class=\"Mark\">大数据</font>推动政府社会治理体系与治理能力现代化中的问题与对策研究\n",
      "智慧牧场<font class=\"Mark\">大数据</font>信息系统的研究与应用\n",
      "“<font class=\"Mark\">大数据</font>”<font class=\"Mark\">平台</font>用户持续参与网络社区价值共创的动因及中介机制研究\n",
      "<font class=\"Mark\">大数据</font>云计算技术在电商营销中的应用\n",
      "市场监管<font class=\"Mark\">大数据</font>归集系统建设研究\n",
      "基于企业<font class=\"Mark\">大数据</font>的邮政智慧型营销体系的建设与实践\n",
      "大型国有建筑施工企业<font class=\"Mark\">大数据</font>应用的几点思考\n",
      "数据库技术与<font class=\"Mark\">人工智能</font>的融合\n"
     ]
    },
    {
     "ename": "ElementClickInterceptedException",
     "evalue": "Message: element click intercepted: Element is not clickable at point (85, 628)\n  (Session info: chrome=91.0.4472.124)\n",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mElementClickInterceptedException\u001b[0m          Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-308-b1ce047a8130>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      9\u001b[0m         \u001b[0melement\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdriver\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfind_elements_by_xpath\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'//td[@class=\"name\"]//a'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mart\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     10\u001b[0m         \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0melement\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_attribute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'innerHTML'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 11\u001b[1;33m         \u001b[0melement\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mclick\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     12\u001b[0m         \u001b[0mdriver\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mswitch_to_window\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdriver\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwindow_handles\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m2\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     13\u001b[0m         \u001b[0msleep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m5\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\anaconda\\lib\\site-packages\\selenium\\webdriver\\remote\\webelement.py\u001b[0m in \u001b[0;36mclick\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m     78\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mclick\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     79\u001b[0m         \u001b[1;34m\"\"\"Clicks the element.\"\"\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 80\u001b[1;33m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_execute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mCommand\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mCLICK_ELEMENT\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     81\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     82\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0msubmit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\anaconda\\lib\\site-packages\\selenium\\webdriver\\remote\\webelement.py\u001b[0m in \u001b[0;36m_execute\u001b[1;34m(self, command, params)\u001b[0m\n\u001b[0;32m    631\u001b[0m             \u001b[0mparams\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m{\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    632\u001b[0m         \u001b[0mparams\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'id'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_id\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 633\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_parent\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcommand\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    634\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    635\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mfind_element\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mby\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mBy\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mID\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\anaconda\\lib\\site-packages\\selenium\\webdriver\\remote\\webdriver.py\u001b[0m in \u001b[0;36mexecute\u001b[1;34m(self, driver_command, params)\u001b[0m\n\u001b[0;32m    319\u001b[0m         \u001b[0mresponse\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcommand_executor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdriver_command\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    320\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mresponse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 321\u001b[1;33m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0merror_handler\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcheck_response\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mresponse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    322\u001b[0m             response['value'] = self._unwrap_value(\n\u001b[0;32m    323\u001b[0m                 response.get('value', None))\n",
      "\u001b[1;32mD:\\anaconda\\lib\\site-packages\\selenium\\webdriver\\remote\\errorhandler.py\u001b[0m in \u001b[0;36mcheck_response\u001b[1;34m(self, response)\u001b[0m\n\u001b[0;32m    240\u001b[0m                 \u001b[0malert_text\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'alert'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'text'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    241\u001b[0m             \u001b[1;32mraise\u001b[0m \u001b[0mexception_class\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mscreen\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstacktrace\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0malert_text\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 242\u001b[1;33m         \u001b[1;32mraise\u001b[0m \u001b[0mexception_class\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mscreen\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstacktrace\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    243\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    244\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0m_value_or_default\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdefault\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mElementClickInterceptedException\u001b[0m: Message: element click intercepted: Element is not clickable at point (85, 628)\n  (Session info: chrome=91.0.4472.124)\n"
     ]
    }
   ],
   "source": [
    "driver.switch_to_window(driver.window_handles[1])\n",
    "element = driver.find_element_by_xpath('//span[@class=\"total\"]')\n",
    "max_page=int(element.get_attribute(\"textContent\").replace(\"共\",\"\").replace(\"页\",\"\"))\n",
    "for page in range(5,max_page):\n",
    "    ###点击详情页下载\n",
    "    all_当前页面的20篇文章 = driver.find_elements_by_xpath('//td[@class=\"name\"]//a')\n",
    "    sleep(5)\n",
    "    for art in range(len(all_当前页面的20篇文章)):\n",
    "        element=driver.find_elements_by_xpath('//td[@class=\"name\"]//a')[art]\n",
    "        print(element.get_attribute('innerHTML'))\n",
    "        element.click()\n",
    "        driver.switch_to_window(driver.window_handles[2])\n",
    "        sleep(5)\n",
    "        path = r\"D:\\数据挖掘pdf\"      # 输入文件夹地址（更改为自己浏览器下载文件夹）\n",
    "        files = os.listdir(path)   # 读入文件夹\n",
    "        num = len(files) \n",
    "        element=driver.find_element_by_xpath('//*[@id=\"pdfDown\"]') #//*[@id=\"pdfDown\"]\n",
    "        driver.execute_script(\"arguments[0].click();\", element)\n",
    "        sleep(10)\n",
    "        nums= len(os.listdir(path))\n",
    "        ### 查看是否有新增下载文件出现    \n",
    "        if nums>num:\n",
    "            driver.close()\n",
    "            sleep(4)\n",
    "            driver.switch_to_window(driver.window_handles[1])\n",
    "        ### 无则进行是否有验证码页面的判定\n",
    "        else:\n",
    "            driver.switch_to_window(driver.window_handles[-1])\n",
    "            sleep(3)\n",
    "            driver.switch_to_window(driver.window_handles[-2])\n",
    "            sleep(3)\n",
    "            driver.switch_to_window(driver.window_handles[-1])\n",
    "            sleep(3)\n",
    "            try:\n",
    "                driver.find_element_by_xpath ('//*[@id=\"vImg\"]')\n",
    "                verification_code()\n",
    "            ### 没出现验证码则关闭当前窗口退回\n",
    "            except:\n",
    "                driver.close()\n",
    "                driver.switch_to_window(driver.window_handles[1])###回到链接页面\n",
    "    element=driver.find_element_by_xpath('//*[@id=\"PageNext\"]')\n",
    "    element.click()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 下载refworks文件及原文"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 309,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\n"
     ]
    }
   ],
   "source": [
    "# 导出refworks文件（.txt）和下载文章\n",
    "# 每次全选不能超过500篇，分3次进行\n",
    "\n",
    "pages = list(range(1,11))\n",
    "print(pages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 310,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 返回第一页\n",
    "driver.find_element_by_id('total').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 311,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 清除选中\n",
    "driver.find_element_by_xpath('//*[@id=\"gridTable\"]/div[1]/div[2]/div[1]/a').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 312,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 选中页面50篇 —> 翻页\n",
    "def process_choose(pages):\n",
    "    for p in pages:\n",
    "        print (p,end='\\t')\n",
    "        全选 = driver.find_element_by_id('selectCheckAll1')\n",
    "        全选.click()\n",
    "        time.sleep(20*random())\n",
    "        跳转 = driver.find_element_by_id('PageNext')\n",
    "        跳转.click()\n",
    "        time.sleep(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 313,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\t2\t3\t4\t5\t6\t7\t8\t9\t10\t"
     ]
    }
   ],
   "source": [
    "process_choose(pages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 314,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 导出与分析 \n",
    "driver.find_element_by_xpath('//i[@class=\"icon-d\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 315,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 导出文献\n",
    "driver.find_element_by_xpath('//i[@class=\"icon-r\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 316,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 点击Refworks\n",
    "driver.find_element_by_xpath('//a[@exporttype=\"Refworks\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 317,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['CDwindow-21A00034E7171FEEA7C000AFB02314D6',\n",
       " 'CDwindow-7597892CF395BDC33E8BBED3D045389B',\n",
       " 'CDwindow-9A513AB3389E8B717235188E209C4B6E']"
      ]
     },
     "execution_count": 317,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 所有窗口ID\n",
    "driver.window_handles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 318,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-318-520070efe65b>:2: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[2])\n"
     ]
    }
   ],
   "source": [
    "# 窗口切换\n",
    "driver.switch_to_window(driver.window_handles[2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 319,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 导出 .txt文件\n",
    "driver.find_element_by_xpath('//i[@class=\"icon icon-export\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 320,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-320-0188c2a7ff70>:2: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[1])\n"
     ]
    }
   ],
   "source": [
    "# 窗口切换\n",
    "driver.switch_to_window(driver.window_handles[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 321,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 清除选择\n",
    "driver.find_element_by_xpath('//*[@id=\"gridTable\"]/div[1]/div[2]/div[1]/a').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 322,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[10, 11, 12, 13, 14, 15, 16]\n"
     ]
    }
   ],
   "source": [
    "# 第二轮下载\n",
    "pages = list(range(10,17))\n",
    "print(pages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 324,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "10\t11\t12\t13\t14\t15\t16\t"
     ]
    }
   ],
   "source": [
    "process_choose(pages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 325,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 导出与分析 \n",
    "driver.find_element_by_xpath('//i[@class=\"icon-d\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 326,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 导出文献\n",
    "driver.find_element_by_xpath('//i[@class=\"icon-r\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 327,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 点击Refworks\n",
    "driver.find_element_by_xpath('//a[@exporttype=\"Refworks\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 328,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['CDwindow-21A00034E7171FEEA7C000AFB02314D6',\n",
       " 'CDwindow-7597892CF395BDC33E8BBED3D045389B',\n",
       " 'CDwindow-9A513AB3389E8B717235188E209C4B6E',\n",
       " 'CDwindow-606A2C28EF2AF336DAF895865F47871D']"
      ]
     },
     "execution_count": 328,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 所有窗口ID\n",
    "driver.window_handles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 329,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-329-1f3bb34cc9cb>:2: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[3])\n"
     ]
    }
   ],
   "source": [
    "# 窗口切换\n",
    "driver.switch_to_window(driver.window_handles[3])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 330,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 导出 .txt文件\n",
    "driver.find_element_by_xpath('//i[@class=\"icon icon-export\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 331,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-331-0188c2a7ff70>:2: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[1])\n"
     ]
    }
   ],
   "source": [
    "# 窗口切换\n",
    "driver.switch_to_window(driver.window_handles[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 332,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 清除选择\n",
    "driver.find_element_by_xpath('//*[@id=\"gridTable\"]/div[1]/div[2]/div[1]/a').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 333,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[17, 18, 19, 20]\n"
     ]
    }
   ],
   "source": [
    "# 第三轮下载\n",
    "pages = list(range(17,21))\n",
    "print(pages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 334,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "17\t18\t19\t20\t"
     ]
    }
   ],
   "source": [
    "process_choose(pages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 335,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 导出与分析 \n",
    "driver.find_element_by_xpath('//i[@class=\"icon-d\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 336,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 导出文献\n",
    "driver.find_element_by_xpath('//i[@class=\"icon-r\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 337,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 点击Refworks\n",
    "driver.find_element_by_xpath('//a[@exporttype=\"Refworks\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 338,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['CDwindow-21A00034E7171FEEA7C000AFB02314D6',\n",
       " 'CDwindow-7597892CF395BDC33E8BBED3D045389B',\n",
       " 'CDwindow-9A513AB3389E8B717235188E209C4B6E',\n",
       " 'CDwindow-606A2C28EF2AF336DAF895865F47871D',\n",
       " 'CDwindow-57A49AAB67F54CF25B41E0C5CDA9F2AB']"
      ]
     },
     "execution_count": 338,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 所有窗口ID\n",
    "driver.window_handles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 339,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-339-1f3bb34cc9cb>:2: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[3])\n"
     ]
    }
   ],
   "source": [
    "# 窗口切换\n",
    "driver.switch_to_window(driver.window_handles[3])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 340,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 导出 .txt文件\n",
    "driver.find_element_by_xpath('//i[@class=\"icon icon-export\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 341,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-341-0188c2a7ff70>:2: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[1])\n"
     ]
    }
   ],
   "source": [
    "# 窗口切换\n",
    "driver.switch_to_window(driver.window_handles[1])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
